Source code for diff_diff.synthetic_control_results

"""
Result container for the classic Synthetic Control Method (SCM) estimator.

This module contains the ``SyntheticControlResults`` dataclass, extracted from
``synthetic_control.py`` to mirror the TROP estimator/results split.

The classic synthetic control of Abadie, Diamond & Hainmueller (2010) produces a
gap path and donor/predictor weights but **no analytical standard error**.
Accordingly ``se``/``t_stat``/``p_value``/``conf_int`` are always NaN on this
object; the point estimate ``att`` (average post-period gap) is the reported
quantity. Significance comes from in-space placebo permutation inference via
:meth:`SyntheticControlResults.in_space_placebo` (a separate ``placebo_p_value``
field, not the NaN ``p_value``).
"""

import warnings
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd

from diff_diff.results import _format_survey_block, _get_significance_stars

__all__ = ["SyntheticControlResults"]


@dataclass
class _SyntheticControlFitSnapshot:
    """Panel state retained for post-hoc in-space placebo refits.

    Holds everything ``SyntheticControlResults.in_space_placebo()`` needs to
    refit ANY donor as the pseudo-treated unit without re-reading the original
    DataFrame. Built in ``SyntheticControl.fit()`` and excluded from pickling by
    ``SyntheticControlResults.__getstate__`` (it retains the full treated+donor
    outcome/predictor panel — a privacy/size hazard if serialized).

    ``specs`` is annotated ``List[Any]`` rather than ``List[_PredictorSpec]`` to
    avoid an import cycle (``_PredictorSpec`` lives in ``synthetic_control.py``,
    which imports this module). ``donor_ids`` is an ORDERED list so the placebo
    iteration order — and therefore the rank / p-value — is deterministic.
    """

    pivots: Dict[str, pd.DataFrame]
    specs: List[Any]
    outcome: str
    all_periods: List[Any]
    pre_periods: List[Any]
    post_periods: List[Any]
    donor_ids: List[Any]
    # The treated unit's reportably-weighted donor support (donor ids with weight above
    # the 1e-6 interpretability floor), FROZEN at fit time and ordered by donor_ids.
    # leave_one_out() iterates this immutable list — NOT the mutable, presentation-level
    # results.donor_weights dict — so post-fit mutation cannot change which donors are
    # dropped, and the robustness result depends only on the fit.
    weighted_donor_ids: List[Any]
    treated_id: Any
    standardize: str
    v_method: str
    custom_v: Optional[Any]
    n_starts: int
    seed: Optional[int]
    optimizer_options: Optional[Dict[str, Any]]
    inner_max_iter: int
    inner_min_decrease: float
    # Training/validation split index for v_method="cv" (positional into pre_periods);
    # None → len(pre_periods)//2 default. Carried so in-space/LOO/in-time placebo refits
    # reproduce the same CV split as the treated fit.
    v_cv_t0: Optional[int]


[docs] @dataclass class SyntheticControlResults: """ Results from a classic Synthetic Control Method (SCM) estimation. Implements Abadie, Diamond & Hainmueller (2010), "Synthetic Control Methods for Comparative Case Studies." A single treated unit's counterfactual is the convex combination ``Σ_j w_j · Y_jt`` of donor units chosen to match the treated unit's pre-period outcomes and predictors; the treatment effect path is the gap ``α̂_1t = Y_1t − Σ_j w_j · Y_jt`` over the post periods. Attributes ---------- att : float Average post-period gap (the reported point estimate). The per-period gaps are in ``gap_path``. se : float Always NaN — classic SCM has no analytical standard error (inference is permutation/placebo based; see Abadie-Diamond-Hainmueller 2010 §2.4). t_stat, p_value : float Always NaN (no analytical SE). conf_int : tuple[float, float] Always (NaN, NaN) (no analytical SE). n_obs : int Number of observations (treated + donor rows over all periods) used. n_donors : int Number of donor units in the (post-filter) donor pool. n_pre_periods : int Number of pre-treatment periods. n_post_periods : int Number of post-treatment periods. donor_weights : dict Mapping ``{donor_unit_id: weight}`` on the unit simplex. Weights below the interpretability floor (1e-6) are dropped. v_weights : dict Mapping ``{predictor_label: v}`` — the diagonal predictor-importance matrix V, trace-normalized to sum to 1. On the degenerate **single-donor** path (one donor forces ``w=[1]``) V is unidentified — every V yields the same synthetic — so ``v_weights`` is **uniform** for every ``v_method`` (including ``cv`` / ``inverse_variance``), with a ``UserWarning`` emitted at fit time. predictor_balance : pandas.DataFrame Predictor-balance table: for each predictor, the treated value, the synthetic value (donor-weighted), and the donor-pool mean. Under ``v_method="cv"`` the reported ``donor_weights`` come from the ADH-2015 step-4 refit on the **validation-window** re-aggregated predictors, so the ``treated`` / ``synthetic`` / ``donor_mean`` values are reported on that same validation-window basis (each spec re-aggregated over ``pre[v_cv_t0:]``) — the row's ``predictor`` label remains the full spec identity, so it stays aligned with ``v_weights``. For every other ``v_method`` the values are the full-pre-period predictor aggregates. gap_path : dict Mapping ``{period: gap}`` for ALL periods (pre periods carry the fit residual used for ``pre_rmspe``; post periods carry the effect path). pre_rmspe : float Root mean squared prediction error over the pre-treatment periods (the primary fit diagnostic). mspe_v : float, optional The outer-objective value of the selected ``V``: the **pre-period** outcome MSPE of ``W*(V*)`` under ``v_method="nested"``, or the held-out **validation-window** outcome MSPE under ``v_method="cv"`` (the CV selection criterion). None when there is no outer search — the ``v_method="custom"`` and ``"inverse_variance"`` paths and the degenerate single-donor path. Not comparable across ``v_method`` values (different objective windows). treated_unit : Any The treated unit's identifier. pre_periods, post_periods : list Calendar-sorted pre / post period values. v_method : str ``"nested"`` (data-driven V), ``"custom"`` (user-supplied V), ``"cv"`` (out-of-sample cross-validation V), or ``"inverse_variance"`` (closed-form ``1/Var(X)`` V). v_cv_t0 : int, optional The training/validation split index actually used under ``v_method="cv"`` (the resolved value — equals ``n_pre_periods // 2`` when the constructor's ``v_cv_t0`` was None). None for every other ``v_method``. Survives pickling. standardize : str ``"std"`` (per-row SD scaling) or ``"none"``. alpha : float Significance level recorded for downstream (placebo) inference. rmspe_ratio : float The treated unit's post/pre RMSPE ratio = ``sqrt(MSPE_post / MSPE_pre)`` — the in-space placebo test statistic (ADH 2010 §2.4), computed at fit time. placebo_p_value : float In-space placebo permutation p-value (``rank / (n_placebos + 1)``), NaN until :meth:`in_space_placebo` is run. SEPARATE from the (always-NaN) analytical ``p_value``; ``is_significant`` stays bound to ``p_value``. n_placebos, n_failed : int Donor placebos that entered the permutation reference set / were excluded for non-convergence. Both 0 until :meth:`in_space_placebo` is run. survey_metadata : Any, optional Reserved; always None in this release. Significance for classic SCM comes from :meth:`in_space_placebo` (opt-in in-space placebo permutation inference); :meth:`get_placebo_df` returns the per-unit RMSPE-ratio table used for the rank. """ att: float se: float t_stat: float p_value: float conf_int: Tuple[float, float] n_obs: int n_donors: int n_pre_periods: int n_post_periods: int donor_weights: Dict[Any, float] v_weights: Dict[str, float] predictor_balance: pd.DataFrame gap_path: Dict[Any, float] pre_rmspe: float treated_unit: Any pre_periods: List[Any] post_periods: List[Any] v_method: str standardize: str alpha: float = 0.05 mspe_v: Optional[float] = None v_cv_t0: Optional[int] = None survey_metadata: Optional[Any] = field(default=None) # In-space placebo permutation inference (Abadie-Diamond-Hainmueller 2010 # Section 2.4), populated by ``in_space_placebo()``. ``rmspe_ratio`` (the # treated unit's post/pre RMSPE ratio) is computed at fit time; the rest stay # at their no-inference defaults until a placebo run. NOTE: the permutation # ``placebo_p_value`` is deliberately SEPARATE from ``p_value`` (which stays # NaN) — it is not an analytical p-value, has no SE / t-stat, and does not # flow through ``safe_inference``. ``is_significant`` likewise stays bound to # the (NaN) ``p_value``, NOT ``placebo_p_value``. placebo_p_value: float = np.nan rmspe_ratio: float = np.nan n_placebos: int = 0 n_failed: int = 0 def __post_init__(self) -> None: # Internal state set per instance by ``fit()`` / ``in_space_placebo()``. # Declared here (not as dataclass fields) so ``dataclasses.fields()`` / # ``dataclasses.asdict()`` cannot reach the retained panel state. # ``_fit_snapshot`` (full panel) and ``_placebo_gaps`` (per-unit gap paths) # are panel-derived and nulled on pickle by ``__getstate__``; ``_placebo_df`` # holds the small per-unit aggregate table returned by ``get_placebo_df()``. self._fit_snapshot: Optional[_SyntheticControlFitSnapshot] = None self._placebo_gaps: Optional[Dict[Any, Dict[Any, float]]] = None self._placebo_df: Optional[pd.DataFrame] = None # Whether the treated unit's own inner Frank-Wolfe weight solve converged. # in_space_placebo() fails closed when this is False: a truncated treated # fit makes the ranked statistic (rmspe_ratio) not a valid SCM optimum. self._fit_converged: bool = True # Explicit reason an in-space placebo run was infeasible/absent, set by # in_space_placebo(). summary() / _scm_native render THIS instead of # reconstructing the cause from counts — n_placebos/n_failed alone cannot # tell a non-converged treated fit ("treated_fit_nonconverged", n_failed=0) # apart from too few donors ("too_few_donors", also n_failed=0). Values: # None (not run), "ran", "treated_fit_nonconverged", "too_few_donors", # "all_placebos_failed". A small string, so it survives pickling. self._placebo_status: Optional[str] = None # --- ADH 2015 §4 robustness diagnostics (opt-in, populated by --- # --- leave_one_out() / in_time_placebo()). Same panel-vs-scalar split as --- # --- the in-space placebo: the small per-row tables (_loo_df / _in_time_df), --- # --- scalar summaries and status strings survive pickling; the per-refit --- # --- gap-path dicts (_loo_gaps / _in_time_gaps) are panel-derived and nulled --- # --- by __getstate__. analytical se/t/p/ci stay NaN throughout. self._loo_df: Optional[pd.DataFrame] = None self._loo_gaps: Optional[Dict[Any, Dict[Any, float]]] = None # Reason a leave-one-out run was infeasible/absent. Values: None (not run), # "ran", "treated_fit_nonconverged", "too_few_donors", "all_refits_failed". self._loo_status: Optional[str] = None # (min, max) ATT across the successful leave-one-out refits (the absolute # spread of counterfactual ATTs); None until run. self._loo_att_range: Optional[Tuple[float, float]] = None # The headline single-donor-dependence number: max |att_loo - baseline_att| # over the successful drops. Baseline-RELATIVE, so a uniform shift of every # drop away from the baseline is NOT masked the way a narrow raw att_range # would be. None until run. self._loo_max_abs_delta_att: Optional[float] = None self._loo_n_failed: int = 0 self._in_time_df: Optional[pd.DataFrame] = None self._in_time_gaps: Optional[Dict[Any, Dict[Any, float]]] = None # Reason an in-time placebo run was infeasible/absent. Values: None (not run), # "ran", "treated_fit_nonconverged", "too_few_pre_periods", # "all_dates_infeasible", "all_dates_failed", "all_dates_unusable" (a mix of # failed + infeasible dates with none usable). self._in_time_status: Optional[str] = None self._in_time_n_failed: int = 0 # Number of placebo dates that were dimensionally infeasible (too few pre-fake # periods, all predictors dropped, or a zero-mass surviving custom_v). Surfaced # alongside _in_time_n_failed so a mixed no-success run reports an accurate mix. self._in_time_n_infeasible: int = 0
[docs] def __getstate__(self) -> Dict[str, Any]: """Exclude panel-derived internal state from pickling. ``_fit_snapshot`` retains the full treated+donor panel and ``_placebo_gaps`` the per-unit gap paths — both panel-derived, a privacy/size hazard if the pickle is sent elsewhere. The scalar placebo fields (``placebo_p_value``, ``rmspe_ratio``, ``n_placebos``, ``n_failed``) and the small ``_placebo_df`` aggregate table survive. An unpickled result keeps all public fields; a diagnostic call that needs the snapshot (``in_space_placebo``) then raises a ValueError directing the user to re-fit. Mirrors ``SyntheticDiDResults``. """ state = self.__dict__.copy() state["_fit_snapshot"] = None state["_placebo_gaps"] = None # ADH-2015 diagnostic gap paths are panel-derived (same hazard as # _placebo_gaps); the small _loo_df / _in_time_df tables + scalar summaries # survive so a round-tripped result still reports the diagnostic, but the # overlay gap accessors raise (re-fit to recompute). state["_loo_gaps"] = None state["_in_time_gaps"] = None return state
[docs] def __repr__(self) -> str: """Concise string representation.""" return ( f"SyntheticControlResults(ATT={self.att:.4f}, " f"pre_RMSPE={self.pre_rmspe:.4f}, " f"n_donors={self.n_donors}, " f"v_method={self.v_method!r})" )
@property def coef_var(self) -> float: """Coefficient of variation: SE / abs(ATT). NaN here (SE is always NaN).""" if not (np.isfinite(self.se) and self.se >= 0): return np.nan if not np.isfinite(self.att) or self.att == 0: return np.nan return self.se / abs(self.att) @property def is_significant(self) -> bool: """Always False — classic SCM produces no analytical p-value.""" return bool(np.isfinite(self.p_value) and self.p_value < self.alpha) @property def significance_stars(self) -> str: """Significance stars based on p-value (empty here — p_value is NaN).""" return _get_significance_stars(self.p_value)
[docs] def summary(self, alpha: Optional[float] = None) -> str: """ Generate a formatted summary of the estimation results. Parameters ---------- alpha : float, optional Significance level; defaults to the alpha used during estimation. Returns ------- str Formatted summary table. """ alpha = alpha or self.alpha n_top = min(5, len(self.donor_weights)) top_donors = sorted(self.donor_weights.items(), key=lambda kv: kv[1], reverse=True)[:n_top] lines = [ "=" * 75, "Synthetic Control Method (SCM) Estimation Results".center(75), "Abadie, Diamond & Hainmueller (2010)".center(75), "=" * 75, "", f"{'Observations:':<28} {self.n_obs:>10}", f"{'Donor units:':<28} {self.n_donors:>10}", f"{'Pre-treatment periods:':<28} {self.n_pre_periods:>10}", f"{'Post-treatment periods:':<28} {self.n_post_periods:>10}", f"{'Treated unit:':<28} {str(self.treated_unit):>10}", "", "-" * 75, "Fit Diagnostics".center(75), "-" * 75, f"{'Pre-treatment RMSPE:':<28} {self.pre_rmspe:>10.4f}", f"{'V selection:':<28} {self.v_method:>10}", f"{'Standardization:':<28} {self.standardize:>10}", ] if self.mspe_v is not None and np.isfinite(self.mspe_v): # Under cv, mspe_v is the held-out VALIDATION-window MSPE (the CV selection # criterion), not the pre-period objective minimized on the nested path. _mspe_label = "Validation MSPE:" if self.v_method == "cv" else "Outer-objective MSPE:" lines.append(f"{_mspe_label:<28} {self.mspe_v:>10.6f}") if self.v_method == "cv" and self.v_cv_t0 is not None: lines.append(f"{'CV train/val split (t0):':<28} {self.v_cv_t0:>10d}") if self.survey_metadata is not None: lines.extend(_format_survey_block(self.survey_metadata, 75)) lines.extend( [ "", "-" * 75, f"{'Top donor weights (w_j)':<40}", "-" * 75, ] ) for unit_id, w in top_donors: lines.append(f"{' ' + str(unit_id):<40} {w:>10.4f}") lines.extend( [ "", "-" * 75, f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} " f"{'t-stat':>10} {'P>|t|':>10}", "-" * 75, f"{'ATT (avg gap)':<15} {self.att:>12.4f} {'n/a':>12} " f"{'n/a':>10} {'n/a':>10}", "-" * 75, "", ] ) # Three states: (1) placebo never run -> point to in_space_placebo(); # (2) run with a valid reference set -> show the permutation p-value; # (3) run but infeasible (no placebo entered the rank, e.g. J<2 or all # donors failed) -> say so explicitly rather than implying it was not run. # ``_placebo_df is not None`` is the "attempted" signal (survives pickling). placebo_attempted = self._placebo_df is not None if placebo_attempted and np.isfinite(self.placebo_p_value): # The classic analytical fields above stay n/a (no SE); this is the # permutation p-value of the post/pre RMSPE ratio, p = rank/(n_placebos+1). lines.extend( [ "In-space placebo permutation inference " "(Abadie-Diamond-Hainmueller 2010, Section 2.4):", f"{' RMSPE ratio (post/pre):':<34} {self.rmspe_ratio:>10.4f}", f"{' Permutation p-value:':<34} {self.placebo_p_value:>10.4f}", f"{' Placebos in reference set:':<34} {self.n_placebos:>10d}" + (f" ({self.n_failed} excluded)" if self.n_failed else ""), "", "(Analytical SE is still undefined for classic SCM; the " "p-value above is permutation-based.)", "=" * 75, ] ) elif placebo_attempted: # Render the SPECIFIC reason recorded by in_space_placebo(); the count # fields (n_placebos=0, n_failed=0) cannot tell a non-converged treated # fit apart from too-few-donors, so do not reconstruct it from counts. status = getattr(self, "_placebo_status", None) if status == "treated_fit_nonconverged": reason = [ "In-space placebo was skipped: the treated unit's own SCM fit " "did not converge at fit time (inner Frank-Wolfe weight solve", "and/or outer V search), so its RMSPE ratio is not a valid " "optimum to rank against placebos. placebo_p_value is undefined", "— re-fit with a larger inner_max_iter / looser " "inner_min_decrease and/or a larger optimizer_options['maxiter']", "/ more n_starts.", ] elif status == "too_few_donors": reason = [ "In-space placebo inference requires at least 2 donors (each " "placebo is fit against the other donors); too few were", "available. placebo_p_value is undefined. Inspect " "get_placebo_df().", ] else: # "all_placebos_failed" (or a legacy unpickle without the status) reason = [ "In-space placebo permutation inference was attempted but " "produced no valid reference set", f"(0 placebos entered the rank; {self.n_failed} failed to " "converge). placebo_p_value is undefined — all donor refits", "failed. Inspect get_placebo_df().", ] lines.extend([*reason, "=" * 75]) else: lines.extend( [ "Inference: classic SCM has no analytical standard error.", "Run in_space_placebo() for in-space permutation inference", "(Abadie-Diamond-Hainmueller 2010, Section 2.4).", "=" * 75, ] ) return "\n".join(lines)
[docs] def print_summary(self, alpha: Optional[float] = None) -> None: """Print the summary to stdout.""" print(self.summary(alpha))
[docs] def to_dict(self) -> Dict[str, Any]: """ Convert scalar results to a dictionary. Returns ------- Dict[str, Any] Dictionary of the scalar estimation results (weights/balance/gaps are available via the ``get_*_df`` accessors). """ result = { "att": self.att, "se": self.se, "t_stat": self.t_stat, "p_value": self.p_value, "conf_int_lower": self.conf_int[0], "conf_int_upper": self.conf_int[1], "n_obs": self.n_obs, "n_donors": self.n_donors, "n_pre_periods": self.n_pre_periods, "n_post_periods": self.n_post_periods, "pre_rmspe": self.pre_rmspe, "mspe_v": self.mspe_v, "treated_unit": self.treated_unit, "v_method": self.v_method, "v_cv_t0": self.v_cv_t0, "standardize": self.standardize, # In-space placebo permutation inference. rmspe_ratio is set at fit; # placebo_p_value / n_placebos / n_failed stay at their no-inference # defaults (NaN / 0) until in_space_placebo() runs. "rmspe_ratio": self.rmspe_ratio, "placebo_p_value": self.placebo_p_value, "n_placebos": self.n_placebos, "n_failed": self.n_failed, } if self.survey_metadata is not None: sm = self.survey_metadata result["weight_type"] = sm.weight_type result["effective_n"] = sm.effective_n result["design_effect"] = sm.design_effect return result
[docs] def to_dataframe(self) -> pd.DataFrame: """Convert scalar results to a single-row pandas DataFrame.""" return pd.DataFrame([self.to_dict()])
[docs] def get_gap_df(self) -> pd.DataFrame: """ Get the gap (effect) path as a DataFrame, in calendar order. Rebuilt period-keyed from ``gap_path`` using the canonical ``pre_periods + post_periods`` order so the row order is independent of any dict-insertion order. Columns: ``period``, ``gap``, ``phase``. Returns ------- pandas.DataFrame """ rows = [] for period in list(self.pre_periods) + list(self.post_periods): if period in self.gap_path: phase = "post" if period in self.post_periods else "pre" rows.append({"period": period, "gap": self.gap_path[period], "phase": phase}) return pd.DataFrame(rows, columns=["period", "gap", "phase"])
[docs] def get_weights_df(self) -> pd.DataFrame: """ Get donor weights as a DataFrame, sorted by weight descending. Returns ------- pandas.DataFrame Columns: ``unit``, ``weight``. """ items = sorted(self.donor_weights.items(), key=lambda kv: kv[1], reverse=True) return pd.DataFrame( [{"unit": unit, "weight": w} for unit, w in items], columns=["unit", "weight"], )
_PLACEBO_COLS = ["unit", "pre_mspe", "post_mspe", "rmspe_ratio", "is_treated", "status"]
[docs] def get_placebo_df(self) -> pd.DataFrame: """ Get the in-space placebo distribution as a DataFrame (one row per unit). This is a per-unit SUMMARY table (one row per unit), enough to reproduce the permutation rank and a ratio-distribution plot — NOT the per-period placebo gap paths needed for the classic "spaghetti" plot (those are retained internally on ``_placebo_gaps`` for the successful placebos). Columns: ``unit``, ``pre_mspe``, ``post_mspe``, ``rmspe_ratio``, ``is_treated``, ``status`` (``"treated"`` / ``"placebo"`` / ``"failed"``). The treated unit is always present as a single ``is_treated=True, status="treated"`` row (its ratio is the original J-donor fit). After a placebo run **that produced a reference set** (``>= 2`` donors AND a converged treated fit), the table has ``n_donors + 1`` rows — every donor appears, including those whose refit did not converge (``status="failed"`` with NaN metrics, excluded from the rank). In the degenerate / fail-closed cases (fewer than 2 donors, or a treated fit that did not converge) the placebo loop does not run, so only the treated row is returned. Populated by :meth:`in_space_placebo`; the summary table is retained on pickling, so it is still returned after a round-trip. Before any placebo run — including on an unpickled result that never ran one — only the treated row is returned. Returns ------- pandas.DataFrame """ if self._placebo_df is not None: return self._placebo_df.copy() from diff_diff.synthetic_control import _mspe pre = _mspe(self.gap_path, self.pre_periods) post = _mspe(self.gap_path, self.post_periods) return pd.DataFrame( [ { "unit": self.treated_unit, "pre_mspe": pre, "post_mspe": post, "rmspe_ratio": self.rmspe_ratio, "is_treated": True, "status": "treated", } ], columns=self._PLACEBO_COLS, )
[docs] def in_space_placebo( self, n_starts: Optional[int] = None, ) -> pd.DataFrame: """ In-space placebo permutation inference (Abadie-Diamond-Hainmueller 2010, Section 2.4). Reassigns the treatment to each donor in turn, re-estimates a synthetic control for that pseudo-treated donor against the OTHER donors, and ranks the real treated unit's post/pre RMSPE ratio among all units. Populates ``placebo_p_value``, ``n_placebos`` and ``n_failed`` on this object (``rmspe_ratio`` — the treated unit's own ratio — is set at fit time) and returns the placebo distribution via :meth:`get_placebo_df`. The real treated unit is **excluded from every placebo's donor pool**: its post-period outcome is treatment-contaminated, so allowing a placebo to load weight on it would bias the placebo gap. The ranking set is therefore the ``J+1`` units ``{treated} ∪ {J placebos}``, with each placebo fit against the other ``J-1`` donors (this matches the standard ``SCtools::generate.placebos`` construction). The post/pre RMSPE ratio normalizes by pre-treatment fit, which obviates the pre-fit-cutoff filtering of ADH Figures 5-7 (journal p. 502), so no pre-fit filter is offered — every converged placebo enters the rank. The permutation ``placebo_p_value`` is intentionally distinct from ``p_value`` (which stays NaN — classic SCM has no analytical SE) and from ``is_significant`` (which also stays bound to the NaN ``p_value``). A placebo is **excluded** from the reference set (counted in ``n_failed``) when its fit is not a valid optimum — EITHER its inner Frank-Wolfe weight solve did not converge (a truncated ``W`` is unusable) OR its outer ``V`` search did not converge (an under-optimized ``V`` fits the pre-period worse, shrinking its RMSPE ratio and biasing the permutation p-value anti-conservatively). Each placebo refit **inherits the original fit's ``optimizer_options`` / ``n_starts``**, so valid inference requires settings adequate for the outer ``V`` search to converge: production defaults do; with cheap settings, raise ``n_starts`` here or re-fit with a larger ``optimizer_options['maxiter']`` (otherwise placebos are dropped as failed). The treated unit's own fit is held to the same standard — if its inner OR outer search did not converge, the whole run fails closed (see below). Parameters ---------- n_starts : int, optional Override the multistart count for each placebo's outer V search (nested/cv). Default None inherits the original fit's ``n_starts``. The placebo loop is the cost driver (one outer V search per donor); lower it for a faster, coarser scan. Returns ------- pandas.DataFrame The placebo distribution (see :meth:`get_placebo_df`). Raises ------ ValueError If the fit snapshot is unavailable (e.g. this result was unpickled). """ if self._fit_snapshot is None: raise ValueError( "in_space_placebo() requires the fit snapshot on the results " "object. This result appears to have been loaded from " "serialization (which excludes the snapshot) or produced by an " "older estimator version. Re-fit to enable in-space placebo " "inference." ) from diff_diff.synthetic_control import _mspe, _placebo_fit_unit snap = self._fit_snapshot donors = list(snap.donor_ids) n_donors = len(donors) if n_starts is None: n_starts_eff = snap.n_starts else: # Mirror the estimator constructor's validation (synthetic_control.py) # so a bad override fails fast instead of silently coercing (e.g. via # int(0)/int(-1)) into a degenerate or invalid permutation procedure. if not isinstance(n_starts, (int, np.integer)) or n_starts < 1: raise ValueError(f"n_starts override must be a positive integer, got {n_starts!r}") n_starts_eff = int(n_starts) treated_pre = _mspe(self.gap_path, snap.pre_periods) treated_post = _mspe(self.gap_path, snap.post_periods) treated_ratio = self.rmspe_ratio rows: List[Dict[str, Any]] = [ { "unit": snap.treated_id, "pre_mspe": treated_pre, "post_mspe": treated_post, "rmspe_ratio": treated_ratio, "is_treated": True, "status": "treated", } ] # Fail closed when the treated unit's OWN fit did not converge at fit time # (inner Frank-Wolfe weight solve OR outer V search): ranking a statistic # from a truncated / under-optimized treated fit would not be a valid ADH # 2010 §2.4 permutation (placebos already fail-closed on non-convergence, so # the treated unit must too). ``_fit_converged`` folds both failure modes, so # the remediation names the knobs for each. if not self._fit_converged: warnings.warn( "In-space placebo skipped: the treated unit's own SCM fit did not " "converge at fit time (inner Frank-Wolfe weight solve and/or outer V " "search), so its RMSPE ratio is not a valid optimum to rank against " "placebos. placebo_p_value is NaN — re-fit with a larger " "inner_max_iter / looser inner_min_decrease (inner) and/or a larger " "optimizer_options['maxiter'] / more n_starts (outer V search).", UserWarning, stacklevel=2, ) self.placebo_p_value = np.nan self.n_placebos = 0 self.n_failed = 0 self._placebo_gaps = {} self._placebo_status = "treated_fit_nonconverged" self._placebo_df = pd.DataFrame(rows, columns=self._PLACEBO_COLS) return self._placebo_df.copy() if n_donors < 2: warnings.warn( "In-space placebo inference requires at least 2 donors (each " f"placebo is fit against the other donors); only {n_donors} " "available. placebo_p_value is NaN.", UserWarning, stacklevel=2, ) self.placebo_p_value = np.nan self.n_placebos = 0 self.n_failed = 0 self._placebo_gaps = {} self._placebo_status = "too_few_donors" self._placebo_df = pd.DataFrame(rows, columns=self._PLACEBO_COLS) return self._placebo_df.copy() if n_donors == 2: warnings.warn( "In-space placebo with 2 donors: each placebo is fit against a " "single donor (degenerate weight w=[1]) with no V search, so the " "permutation p-value is coarse (only 2 placebos enter the " "reference set; the smallest attainable p-value is 1/3).", UserWarning, stacklevel=2, ) placebo_gaps: Dict[Any, Dict[Any, float]] = {} ranked_ratios: List[float] = [] n_failed = 0 for j in donors: pool = [d for d in donors if d != j] fitted = _placebo_fit_unit(snap, j, pool, n_starts_eff) if fitted is None: # Non-converged inner Frank-Wolfe weight solve (a truncated W is # unusable for ranking): exclude from BOTH the numerator and the # denominator (never penalize a truncated solve into the rank). # Still record the donor with NaN metrics so get_placebo_df() # returns the full treated + every-donor unit set. n_failed += 1 rows.append( { "unit": j, "pre_mspe": np.nan, "post_mspe": np.nan, "rmspe_ratio": np.nan, "is_treated": False, "status": "failed", } ) continue gap_path_j, ratio_j = fitted placebo_gaps[j] = gap_path_j pre_j = _mspe(gap_path_j, snap.pre_periods) post_j = _mspe(gap_path_j, snap.post_periods) ranked_ratios.append(ratio_j) rows.append( { "unit": j, "pre_mspe": pre_j, "post_mspe": post_j, "rmspe_ratio": ratio_j, "is_treated": False, "status": "placebo", } ) n_placebos = len(ranked_ratios) if n_placebos == 0: warnings.warn( "No in-space placebo entered the reference set (all donors " f"failed to converge or were filtered out of {n_donors}); " "placebo_p_value is NaN.", UserWarning, stacklevel=2, ) p_value = np.nan else: # Upper-tail rank on the (unsigned) RMSPE ratio, treated unit included # as the "+1". Ties counted via ``>=`` so the p-value is conservative. # (The ratio squares the gaps -> direction-agnostic, NOT a signed test.) rank = 1 + sum(1 for r in ranked_ratios if r >= treated_ratio) p_value = rank / (n_placebos + 1) if n_failed > 0: cv_note = ( " Under v_method='cv' an excluded refit may instead be STRUCTURALLY " "infeasible (the pseudo-treated unit's donor pool is indistinguishable in a " "re-aggregated CV window) — remedied by adjusting the predictors, v_cv_t0, " "or the donor pool, NOT inner_max_iter / n_starts." if snap.v_method == "cv" else "" ) warnings.warn( f"{n_failed} of {n_donors} in-space placebos were excluded from the " "permutation distribution (the refit did not reach a valid optimum — a " "non-converged inner weight solve or outer V search); " f"placebo_p_value uses the remaining {n_placebos}.{cv_note}", UserWarning, stacklevel=2, ) self.placebo_p_value = float(p_value) self.n_placebos = int(n_placebos) self.n_failed = int(n_failed) self._placebo_gaps = placebo_gaps self._placebo_status = "ran" if n_placebos > 0 else "all_placebos_failed" self._placebo_df = pd.DataFrame(rows, columns=self._PLACEBO_COLS) return self._placebo_df.copy()
_LOO_COLS = [ "dropped_unit", "att", "pre_rmspe", "post_rmspe", "rmspe_ratio", "delta_att", "status", ]
[docs] def leave_one_out(self, n_starts: Optional[int] = None) -> pd.DataFrame: """ Leave-one-out donor robustness (Abadie-Diamond-Hainmueller 2015, Section 4). Drops each **reportably-weighted** donor, one at a time, and re-fits the treated unit's synthetic control against the remaining donor pool. The per-drop ATTs reveal whether the estimated effect is driven by any single donor (ADH 2015 overlay the leave-one-out counterfactual trajectories for this purpose; :meth:`get_leave_one_out_gaps` returns those paths). This is a thin re-run of the validated SCM solver — it has **no analytical standard error**; ``se``/``t_stat``/``p_value``/``conf_int`` and ``is_significant`` are unaffected (still bound to the NaN analytical ``p_value``). The drop set is exactly the donors in ``donor_weights`` — those above the ``1e-6`` interpretability floor (``synthetic_control._MIN_REPORT_WEIGHT``). A donor with negligible weight ``0 < w ≤ 1e-6`` is excluded (its removal moves the ATT by ~the weight, so its ``delta_att`` would be ~0 — an uninformative row), keeping the LOO table aligned with the reported support; a zero-weight donor's removal leaves the synthetic unchanged. (This `1e-6` approximation of "positive weight" is documented in REGISTRY §SyntheticControl.) A donor that carries ALL the weight is still dropped (the others absorb its mass on re-fit); its large ``delta_att`` is exactly the single-donor-dependence signal this diagnostic exists to surface, NOT a failure. Parameters ---------- n_starts : int, optional Override the multistart count for each leave-one-out refit's outer V search (nested/cv). Default None inherits the original fit's ``n_starts``. Returns ------- pandas.DataFrame One ``status="baseline"`` row (the full fit, ``delta_att=0``) followed by one row per dropped donor (``status="loo"``, or ``"failed"`` with NaN metrics when its refit did not converge), sorted by ``|delta_att|`` descending (failed rows last). Columns: ``dropped_unit``, ``att``, ``pre_rmspe``, ``post_rmspe``, ``rmspe_ratio``, ``delta_att`` (``att_loo - full_att``), ``status``. Raises ------ ValueError If the fit snapshot is unavailable (e.g. this result was unpickled). """ if self._fit_snapshot is None: raise ValueError( "leave_one_out() requires the fit snapshot on the results object. " "This result appears to have been loaded from serialization (which " "excludes the snapshot) or produced by an older estimator version. " "Re-fit to enable leave-one-out donor robustness." ) from diff_diff.synthetic_control import _mspe, _placebo_fit_unit snap = self._fit_snapshot if n_starts is None: n_starts_eff = snap.n_starts else: # Mirror the estimator constructor's validation so a bad override fails # fast instead of silently coercing into a degenerate refit (cf. # in_space_placebo()). if not isinstance(n_starts, (int, np.integer)) or n_starts < 1: raise ValueError(f"n_starts override must be a positive integer, got {n_starts!r}") n_starts_eff = int(n_starts) # Baseline row: read DIRECTLY from the full fit (do NOT re-fit), so the # reference ATT — and therefore delta_att=0.0 — is exact. baseline_row = { "dropped_unit": None, "att": float(self.att), "pre_rmspe": float(self.pre_rmspe), "post_rmspe": float(np.sqrt(_mspe(self.gap_path, snap.post_periods))), "rmspe_ratio": float(self.rmspe_ratio), "delta_att": 0.0, "status": "baseline", } # Fail closed when the treated unit's own fit did not converge: a truncated / # under-optimized baseline ATT makes every leave-one-out delta meaningless. if not self._fit_converged: warnings.warn( "Leave-one-out skipped: the treated unit's own SCM fit did not " "converge at fit time (inner Frank-Wolfe weight solve and/or outer V " "search), so the baseline ATT is not a valid optimum to compare " "leave-one-out refits against. Re-fit with a larger inner_max_iter / " "looser inner_min_decrease (inner) and/or a larger " "optimizer_options['maxiter'] / more n_starts (outer V search).", UserWarning, stacklevel=2, ) self._loo_status = "treated_fit_nonconverged" self._loo_att_range = None self._loo_n_failed = 0 self._loo_gaps = {} self._loo_df = pd.DataFrame([baseline_row], columns=self._LOO_COLS) return self._loo_df.copy() # Dropping any donor requires at least one donor left in the pool. if len(snap.donor_ids) < 2: warnings.warn( "Leave-one-out donor robustness requires at least 2 donors (dropping " f"one must leave a non-empty pool); only {len(snap.donor_ids)} " "available. Returning the baseline fit only.", UserWarning, stacklevel=2, ) self._loo_status = "too_few_donors" self._loo_att_range = None self._loo_n_failed = 0 self._loo_gaps = {} self._loo_df = pd.DataFrame([baseline_row], columns=self._LOO_COLS) return self._loo_df.copy() # Drop the FROZEN reportably-weighted support captured at fit time (donor ids # with weight above the 1e-6 floor, in donor_ids order). Reading the snapshot — # NOT the mutable presentation-level self.donor_weights — makes the result # depend only on the fit and immune to post-fit mutation of donor_weights. pos_donors = list(snap.weighted_donor_ids) loo_gaps: Dict[Any, Dict[Any, float]] = {} loo_rows: List[Dict[str, Any]] = [] atts: List[float] = [] n_failed = 0 for d in pos_donors: pool = [x for x in snap.donor_ids if x != d] fitted = _placebo_fit_unit(snap, snap.treated_id, pool, n_starts_eff) if fitted is None: n_failed += 1 loo_rows.append( { "dropped_unit": d, "att": np.nan, "pre_rmspe": np.nan, "post_rmspe": np.nan, "rmspe_ratio": np.nan, "delta_att": np.nan, "status": "failed", } ) continue gap_path_d, ratio_d = fitted loo_gaps[d] = gap_path_d att_d = float(np.mean([gap_path_d[p] for p in snap.post_periods])) atts.append(att_d) loo_rows.append( { "dropped_unit": d, "att": att_d, "pre_rmspe": float(np.sqrt(_mspe(gap_path_d, snap.pre_periods))), "post_rmspe": float(np.sqrt(_mspe(gap_path_d, snap.post_periods))), "rmspe_ratio": ratio_d, "delta_att": att_d - float(self.att), "status": "loo", } ) # Sort successful drops by |delta_att| desc (most influential donor first); # non-converged drops sort last. finite_rows = sorted( (r for r in loo_rows if r["status"] == "loo"), key=lambda r: abs(r["delta_att"]), reverse=True, ) failed_rows = [r for r in loo_rows if r["status"] == "failed"] ordered = [baseline_row] + finite_rows + failed_rows if n_failed > 0: cv_note = ( " Under v_method='cv' a 'failed' drop may instead be STRUCTURALLY " "infeasible (the reduced donor pool is indistinguishable in a re-aggregated " "CV window) — remedied by adjusting the predictors, v_cv_t0, or the donor " "pool, NOT inner_max_iter / n_starts." if snap.v_method == "cv" else "" ) warnings.warn( f"{n_failed} of {len(pos_donors)} leave-one-out refits were excluded with " "NaN metrics (status='failed'; the refit did not reach a valid optimum — a " "non-converged inner weight solve or outer V search); the ATT range uses " f"the remaining refits.{cv_note}", UserWarning, stacklevel=2, ) self._loo_gaps = loo_gaps self._loo_n_failed = int(n_failed) self._loo_att_range = (min(atts), max(atts)) if atts else None # Baseline-relative headline: the largest swing of any single donor-drop from # the full-fit ATT (max |delta_att|). Robust to a uniform shift that a raw # att_range would understate. self._loo_max_abs_delta_att = max(abs(a - float(self.att)) for a in atts) if atts else None # Distinguish a real run from "every donor-drop refit failed to converge" # (no valid leave-one-out estimate produced) so DR/BR do not report an empty # diagnostic as completed. (pos_donors empty — a converged fit always has >=1 # positive weight — falls through to "ran": baseline-only, benign.) self._loo_status = "all_refits_failed" if (pos_donors and not atts) else "ran" self._loo_df = pd.DataFrame(ordered, columns=self._LOO_COLS) return self._loo_df.copy()
[docs] def get_leave_one_out_df(self) -> pd.DataFrame: """ Get the leave-one-out donor-robustness table (see :meth:`leave_one_out`). Survives pickling. Raises if :meth:`leave_one_out` has not been run. Returns ------- pandas.DataFrame """ if self._loo_df is None: raise ValueError("No leave-one-out results yet; call leave_one_out() first.") return self._loo_df.copy()
[docs] def get_leave_one_out_gaps(self) -> pd.DataFrame: """ Long-form leave-one-out gap paths, for the overlay ("spaghetti") plot. One row per (dropped donor, period) for every converged leave-one-out refit. Columns: ``dropped_unit``, ``period``, ``gap``, ``phase`` (``"pre"``/ ``"post"``) — mirroring :meth:`get_gap_df`. These per-period paths are panel-derived and are NOT retained after pickling. Returns ------- pandas.DataFrame Raises ------ ValueError If :meth:`leave_one_out` has not been run, or if the gap paths were dropped on pickling (re-fit and re-run to recompute them). """ if self._loo_df is None: raise ValueError("No leave-one-out results yet; call leave_one_out() first.") if self._loo_gaps is None: raise ValueError( "Leave-one-out gap paths are not retained after pickling " "(panel-derived); re-run leave_one_out() on a freshly fitted result " "to recompute them." ) rows: List[Dict[str, Any]] = [] for unit, gap_path in self._loo_gaps.items(): for period in list(self.pre_periods) + list(self.post_periods): if period in gap_path: phase = "post" if period in self.post_periods else "pre" rows.append( { "dropped_unit": unit, "period": period, "gap": gap_path[period], "phase": phase, } ) return pd.DataFrame(rows, columns=["dropped_unit", "period", "gap", "phase"])
_IN_TIME_COLS = [ "placebo_period", "placebo_att", "pre_fit_rmspe", "rmspe_ratio", "n_pre_fake", "n_post_fake", "n_dropped_specs", "status", ]
[docs] def in_time_placebo( self, placebo_periods: Optional[Any] = None, n_starts: Optional[int] = None, ) -> pd.DataFrame: """ In-time (backdating) placebo (Abadie-Diamond-Hainmueller 2015, Section 4). Reassigns the intervention to an earlier pre-treatment date ``t_f`` and re-fits the synthetic control using ONLY pre-``t_f`` information, then measures the "effect" over the held-out window ``[t_f, T0)``. A credible synthetic control should show **no spurious gap** there (ADH 2015 Figure 4, German reunification backdated to 1975). This is a thin re-run of the validated SCM solver — it has **no analytical standard error**; ``se``/``t_stat``/``p_value``/``conf_int`` and ``is_significant`` are unaffected. **Windowing convention (TRUNCATE).** The placebo fit uses only periods strictly before ``t_f``: pre-period-outcome predictors become the pre-``t_f`` outcomes, and covariate / special predictor windows are intersected with the pre-``t_f`` window. A predictor window lying ENTIRELY in the held-out region ``[t_f, T0)`` is dropped (surfaced in ``n_dropped_specs`` + an aggregated warning). For outcome-predictor fits this equals the literal "lag the predictors" re-run of a manual ``Synth::synth`` (R has no in-time-placebo function); see ``docs/methodology/REGISTRY.md`` for the recognized deviation note. Parameters ---------- placebo_periods : period value or list of period values, optional The pseudo-intervention date(s), each a member of ``pre_periods``. Default None sweeps every feasible interior pre-date (at least 2 pre-fake periods to fit + at least 1 post-fake period to measure the gap). A date that is a true post-treatment period, or not a pre-period at all, raises ``ValueError``; a valid pre-date that is dimensionally infeasible (too few pre-fake periods, or all predictors dropped) yields a ``status="infeasible"`` row (no raise). n_starts : int, optional Override the multistart count for each placebo refit's outer V search (nested/cv). Default None inherits the original fit's ``n_starts``. Returns ------- pandas.DataFrame One row per placebo date. Columns: ``placebo_period``, ``placebo_att`` (mean gap over the held-out window — should be ~0 if no real pre-period effect), ``pre_fit_rmspe``, ``rmspe_ratio`` (post-fake/pre-fake), ``n_pre_fake``, ``n_post_fake``, ``n_dropped_specs``, ``status`` (``"ran"`` / ``"infeasible"`` / ``"failed"``). Raises ------ ValueError If the fit snapshot is unavailable (e.g. this result was unpickled), or an explicit ``placebo_periods`` entry is a post-treatment period / not a pre-period. """ if self._fit_snapshot is None: raise ValueError( "in_time_placebo() requires the fit snapshot on the results object. " "This result appears to have been loaded from serialization (which " "excludes the snapshot) or produced by an older estimator version. " "Re-fit to enable the in-time placebo." ) from diff_diff.synthetic_control import ( _mspe, _placebo_fit_unit, _truncate_snapshot_in_time, ) snap = self._fit_snapshot if n_starts is None: n_starts_eff = snap.n_starts else: if not isinstance(n_starts, (int, np.integer)) or n_starts < 1: raise ValueError(f"n_starts override must be a positive integer, got {n_starts!r}") n_starts_eff = int(n_starts) pre = list(snap.pre_periods) empty = pd.DataFrame([], columns=self._IN_TIME_COLS) # Fail closed when the treated unit's own fit did not converge: a truncated / # under-optimized baseline makes the placebo comparison meaningless. if not self._fit_converged: warnings.warn( "In-time placebo skipped: the treated unit's own SCM fit did not " "converge at fit time (inner Frank-Wolfe weight solve and/or outer V " "search). Re-fit with a larger inner_max_iter / looser " "inner_min_decrease (inner) and/or a larger optimizer_options['maxiter'] " "/ more n_starts (outer V search).", UserWarning, stacklevel=2, ) self._in_time_status = "treated_fit_nonconverged" self._in_time_n_failed = 0 self._in_time_gaps = {} self._in_time_df = empty return empty.copy() # A feasible date needs >=2 pre-fake + >=1 post-fake period -> >=3 pre periods. # The >=2 pre-fake rule is a deliberate Note-documented restriction (an auto- # swept single-pre-fake placebo is a non-credible pre-fit; see REGISTRY). if len(pre) < 3: warnings.warn( "In-time placebo requires at least 3 pre-treatment periods (a feasible " "placebo date needs >=2 pre-fake periods to fit and >=1 post-fake period " f"to measure the gap); only {len(pre)} available.", UserWarning, stacklevel=2, ) self._in_time_status = "too_few_pre_periods" self._in_time_n_failed = 0 self._in_time_gaps = {} self._in_time_df = empty return empty.copy() if placebo_periods is None: # Sweep every feasible pre-date (positional: idx>=2 gives >=2 pre-fake + # >=1 post-fake; idx<2 would leave fewer than 2 pre-fake periods). dates: List[Any] = [pre[i] for i in range(2, len(pre))] else: if isinstance(placebo_periods, (list, tuple, set, np.ndarray, pd.Index, pd.Series)): dates = list(placebo_periods) else: dates = [placebo_periods] # An explicit but EMPTY container is a malformed request (NOT "every date # was infeasible") — fail fast, consistent with the post-date / non-pre # date raises below. Pass None to sweep all feasible pre-dates. if not dates: raise ValueError( "placebo_periods is empty; pass None to sweep all feasible " "pre-dates, or a non-empty list of pre-period date(s)." ) pre_set = set(pre) post_set = set(snap.post_periods) for d in dates: if d in post_set: raise ValueError( f"placebo_period {d!r} is a true post-treatment period; an " "in-time placebo date must lie in the pre-treatment window." ) if d not in pre_set: raise ValueError( f"placebo_period {d!r} is not a pre-treatment period " f"(pre_periods = {pre})." ) # De-duplicate + canonicalize to pre-period order (mirrors _resolve_periods): # duplicate / unordered explicit dates must not trigger duplicate refits or # inflate n_dates. _requested = set(dates) dates = [p for p in pre if p in _requested] in_time_gaps: Dict[Any, Dict[Any, float]] = {} rows: List[Dict[str, Any]] = [] dropped_all: set = set() n_failed = 0 n_infeasible = 0 n_ran = 0 for t_f in dates: idx = pre.index(t_f) n_pre_fake = idx n_post_fake = len(pre) - idx snap_mod, dropped = _truncate_snapshot_in_time(snap, t_f) dropped_all.update(dropped) if snap_mod is None: n_infeasible += 1 rows.append( { "placebo_period": t_f, "placebo_att": np.nan, "pre_fit_rmspe": np.nan, "rmspe_ratio": np.nan, "n_pre_fake": n_pre_fake, "n_post_fake": n_post_fake, "n_dropped_specs": len(dropped), "status": "infeasible", } ) continue fitted = _placebo_fit_unit(snap_mod, snap.treated_id, snap.donor_ids, n_starts_eff) if fitted is None: n_failed += 1 rows.append( { "placebo_period": t_f, "placebo_att": np.nan, "pre_fit_rmspe": np.nan, "rmspe_ratio": np.nan, "n_pre_fake": n_pre_fake, "n_post_fake": n_post_fake, "n_dropped_specs": len(dropped), "status": "failed", } ) continue gap_path, ratio = fitted in_time_gaps[t_f] = gap_path placebo_att = float(np.mean([gap_path[p] for p in snap_mod.post_periods])) rows.append( { "placebo_period": t_f, "placebo_att": placebo_att, "pre_fit_rmspe": float(np.sqrt(_mspe(gap_path, snap_mod.pre_periods))), "rmspe_ratio": ratio, "n_pre_fake": n_pre_fake, "n_post_fake": n_post_fake, "n_dropped_specs": len(dropped), "status": "ran", } ) n_ran += 1 if dropped_all: warnings.warn( "In-time placebo (TRUNCATE convention): predictor(s) " f"{sorted(map(str, dropped_all))} fell entirely in the held-out " "post-fake window for some placebo date(s) and were dropped from those " "refits (see the n_dropped_specs column).", UserWarning, stacklevel=2, ) if n_infeasible > 0: warnings.warn( f"{n_infeasible} in-time placebo date(s) were structurally infeasible " "(too few pre-fake periods, all predictors dropped, or — under " "v_method='cv' — a kept predictor no longer spans both windows, or a " "re-aggregated window loses cross-donor variation, after truncation) and " "are reported with status='infeasible' (NaN metrics).", UserWarning, stacklevel=2, ) if n_failed > 0: warnings.warn( f"{n_failed} in-time placebo refit(s) failed to converge and are " "reported with status='failed' (NaN metrics).", UserWarning, stacklevel=2, ) self._in_time_gaps = in_time_gaps self._in_time_n_failed = int(n_failed) self._in_time_n_infeasible = int(n_infeasible) # When no date ran, classify the cause precisely so the downstream reason text # is never false: a pure convergence failure ("all_dates_failed", actionable — # raise n_starts / loosen tolerances) and pure dimensional infeasibility # ("all_dates_infeasible", structural) are distinct; a MIX of both gets its own # "all_dates_unusable" code (both counters are surfaced) rather than being # mislabeled as exclusively one or the other. if n_ran > 0: self._in_time_status = "ran" elif n_failed > 0 and n_infeasible > 0: self._in_time_status = "all_dates_unusable" elif n_failed > 0: self._in_time_status = "all_dates_failed" else: self._in_time_status = "all_dates_infeasible" self._in_time_df = pd.DataFrame(rows, columns=self._IN_TIME_COLS) return self._in_time_df.copy()
[docs] def get_in_time_placebo_df(self) -> pd.DataFrame: """ Get the in-time placebo table (see :meth:`in_time_placebo`). Survives pickling. Raises if :meth:`in_time_placebo` has not been run. Returns ------- pandas.DataFrame """ if self._in_time_df is None: raise ValueError("No in-time placebo results yet; call in_time_placebo() first.") return self._in_time_df.copy()
[docs] def get_in_time_placebo_gaps(self) -> pd.DataFrame: """ Long-form in-time placebo gap paths, for the backdating overlay plot. One row per (placebo date, period) for every converged in-time refit. Columns: ``placebo_period``, ``period``, ``gap``, ``phase`` (``"pre_fake"`` for periods before the placebo date, ``"post_fake"`` for the held-out window from it on). These per-period paths are panel-derived and are NOT retained after pickling. Returns ------- pandas.DataFrame Raises ------ ValueError If :meth:`in_time_placebo` has not been run, or if the gap paths were dropped on pickling (re-fit and re-run to recompute them). """ if self._in_time_df is None: raise ValueError("No in-time placebo results yet; call in_time_placebo() first.") if self._in_time_gaps is None: raise ValueError( "In-time placebo gap paths are not retained after pickling " "(panel-derived); re-run in_time_placebo() on a freshly fitted result " "to recompute them." ) pre = list(self.pre_periods) rows: List[Dict[str, Any]] = [] for t_f, gap_path in self._in_time_gaps.items(): split = pre.index(t_f) for period in pre: if period in gap_path: phase = "post_fake" if pre.index(period) >= split else "pre_fake" rows.append( { "placebo_period": t_f, "period": period, "gap": gap_path[period], "phase": phase, } ) return pd.DataFrame(rows, columns=["placebo_period", "period", "gap", "phase"])