"""
Result container for the classic Synthetic Control Method (SCM) estimator.
This module contains the ``SyntheticControlResults`` dataclass, extracted from
``synthetic_control.py`` to mirror the TROP estimator/results split.
The classic synthetic control of Abadie, Diamond & Hainmueller (2010) produces a
gap path and donor/predictor weights but **no analytical standard error**.
Accordingly ``se``/``t_stat``/``p_value``/``conf_int`` are always NaN on this
object; the point estimate ``att`` (average post-period gap) is the reported
quantity. Significance comes from in-space placebo permutation inference via
:meth:`SyntheticControlResults.in_space_placebo` (a separate ``placebo_p_value``
field, not the NaN ``p_value``).
"""
import warnings
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
from diff_diff.results import _format_survey_block, _get_significance_stars
__all__ = ["SyntheticControlResults"]
@dataclass
class _SyntheticControlFitSnapshot:
"""Panel state retained for post-hoc in-space placebo refits.
Holds everything ``SyntheticControlResults.in_space_placebo()`` needs to
refit ANY donor as the pseudo-treated unit without re-reading the original
DataFrame. Built in ``SyntheticControl.fit()`` and excluded from pickling by
``SyntheticControlResults.__getstate__`` (it retains the full treated+donor
outcome/predictor panel — a privacy/size hazard if serialized).
``specs`` is annotated ``List[Any]`` rather than ``List[_PredictorSpec]`` to
avoid an import cycle (``_PredictorSpec`` lives in ``synthetic_control.py``,
which imports this module). ``donor_ids`` is an ORDERED list so the placebo
iteration order — and therefore the rank / p-value — is deterministic.
"""
pivots: Dict[str, pd.DataFrame]
specs: List[Any]
outcome: str
all_periods: List[Any]
pre_periods: List[Any]
post_periods: List[Any]
donor_ids: List[Any]
# The treated unit's reportably-weighted donor support (donor ids with weight above
# the 1e-6 interpretability floor), FROZEN at fit time and ordered by donor_ids.
# leave_one_out() iterates this immutable list — NOT the mutable, presentation-level
# results.donor_weights dict — so post-fit mutation cannot change which donors are
# dropped, and the robustness result depends only on the fit.
weighted_donor_ids: List[Any]
treated_id: Any
standardize: str
v_method: str
custom_v: Optional[Any]
n_starts: int
seed: Optional[int]
optimizer_options: Optional[Dict[str, Any]]
inner_max_iter: int
inner_min_decrease: float
# Training/validation split index for v_method="cv" (positional into pre_periods);
# None → len(pre_periods)//2 default. Carried so in-space/LOO/in-time placebo refits
# reproduce the same CV split as the treated fit.
v_cv_t0: Optional[int]
[docs]
@dataclass
class SyntheticControlResults:
"""
Results from a classic Synthetic Control Method (SCM) estimation.
Implements Abadie, Diamond & Hainmueller (2010), "Synthetic Control Methods
for Comparative Case Studies." A single treated unit's counterfactual is the
convex combination ``Σ_j w_j · Y_jt`` of donor units chosen to match the
treated unit's pre-period outcomes and predictors; the treatment effect path
is the gap ``α̂_1t = Y_1t − Σ_j w_j · Y_jt`` over the post periods.
Attributes
----------
att : float
Average post-period gap (the reported point estimate). The per-period
gaps are in ``gap_path``.
se : float
Always NaN — classic SCM has no analytical standard error (inference is
permutation/placebo based; see Abadie-Diamond-Hainmueller 2010 §2.4).
t_stat, p_value : float
Always NaN (no analytical SE).
conf_int : tuple[float, float]
Always (NaN, NaN) (no analytical SE).
n_obs : int
Number of observations (treated + donor rows over all periods) used.
n_donors : int
Number of donor units in the (post-filter) donor pool.
n_pre_periods : int
Number of pre-treatment periods.
n_post_periods : int
Number of post-treatment periods.
donor_weights : dict
Mapping ``{donor_unit_id: weight}`` on the unit simplex. Weights below
the interpretability floor (1e-6) are dropped.
v_weights : dict
Mapping ``{predictor_label: v}`` — the diagonal predictor-importance
matrix V, trace-normalized to sum to 1. On the degenerate **single-donor**
path (one donor forces ``w=[1]``) V is unidentified — every V yields the same
synthetic — so ``v_weights`` is **uniform** for every ``v_method`` (including
``cv`` / ``inverse_variance``), with a ``UserWarning`` emitted at fit time.
predictor_balance : pandas.DataFrame
Predictor-balance table: for each predictor, the treated value, the
synthetic value (donor-weighted), and the donor-pool mean. Under
``v_method="cv"`` the reported ``donor_weights`` come from the ADH-2015 step-4
refit on the **validation-window** re-aggregated predictors, so the ``treated`` /
``synthetic`` / ``donor_mean`` values are reported on that same validation-window
basis (each spec re-aggregated over ``pre[v_cv_t0:]``) — the row's ``predictor``
label remains the full spec identity, so it stays aligned with ``v_weights``. For
every other ``v_method`` the values are the full-pre-period predictor aggregates.
gap_path : dict
Mapping ``{period: gap}`` for ALL periods (pre periods carry the fit
residual used for ``pre_rmspe``; post periods carry the effect path).
pre_rmspe : float
Root mean squared prediction error over the pre-treatment periods (the
primary fit diagnostic).
mspe_v : float, optional
The outer-objective value of the selected ``V``: the **pre-period** outcome
MSPE of ``W*(V*)`` under ``v_method="nested"``, or the held-out
**validation-window** outcome MSPE under ``v_method="cv"`` (the CV selection
criterion). None when there is no outer search — the ``v_method="custom"``
and ``"inverse_variance"`` paths and the degenerate single-donor path. Not
comparable across ``v_method`` values (different objective windows).
treated_unit : Any
The treated unit's identifier.
pre_periods, post_periods : list
Calendar-sorted pre / post period values.
v_method : str
``"nested"`` (data-driven V), ``"custom"`` (user-supplied V), ``"cv"``
(out-of-sample cross-validation V), or ``"inverse_variance"`` (closed-form
``1/Var(X)`` V).
v_cv_t0 : int, optional
The training/validation split index actually used under ``v_method="cv"``
(the resolved value — equals ``n_pre_periods // 2`` when the constructor's
``v_cv_t0`` was None). None for every other ``v_method``. Survives pickling.
standardize : str
``"std"`` (per-row SD scaling) or ``"none"``.
alpha : float
Significance level recorded for downstream (placebo) inference.
rmspe_ratio : float
The treated unit's post/pre RMSPE ratio = ``sqrt(MSPE_post / MSPE_pre)`` —
the in-space placebo test statistic (ADH 2010 §2.4), computed at fit time.
placebo_p_value : float
In-space placebo permutation p-value (``rank / (n_placebos + 1)``), NaN
until :meth:`in_space_placebo` is run. SEPARATE from the (always-NaN)
analytical ``p_value``; ``is_significant`` stays bound to ``p_value``.
n_placebos, n_failed : int
Donor placebos that entered the permutation reference set / were excluded
for non-convergence. Both 0 until :meth:`in_space_placebo` is run.
survey_metadata : Any, optional
Reserved; always None in this release.
Significance for classic SCM comes from :meth:`in_space_placebo` (opt-in
in-space placebo permutation inference); :meth:`get_placebo_df` returns the
per-unit RMSPE-ratio table used for the rank.
"""
att: float
se: float
t_stat: float
p_value: float
conf_int: Tuple[float, float]
n_obs: int
n_donors: int
n_pre_periods: int
n_post_periods: int
donor_weights: Dict[Any, float]
v_weights: Dict[str, float]
predictor_balance: pd.DataFrame
gap_path: Dict[Any, float]
pre_rmspe: float
treated_unit: Any
pre_periods: List[Any]
post_periods: List[Any]
v_method: str
standardize: str
alpha: float = 0.05
mspe_v: Optional[float] = None
v_cv_t0: Optional[int] = None
survey_metadata: Optional[Any] = field(default=None)
# In-space placebo permutation inference (Abadie-Diamond-Hainmueller 2010
# Section 2.4), populated by ``in_space_placebo()``. ``rmspe_ratio`` (the
# treated unit's post/pre RMSPE ratio) is computed at fit time; the rest stay
# at their no-inference defaults until a placebo run. NOTE: the permutation
# ``placebo_p_value`` is deliberately SEPARATE from ``p_value`` (which stays
# NaN) — it is not an analytical p-value, has no SE / t-stat, and does not
# flow through ``safe_inference``. ``is_significant`` likewise stays bound to
# the (NaN) ``p_value``, NOT ``placebo_p_value``.
placebo_p_value: float = np.nan
rmspe_ratio: float = np.nan
n_placebos: int = 0
n_failed: int = 0
def __post_init__(self) -> None:
# Internal state set per instance by ``fit()`` / ``in_space_placebo()``.
# Declared here (not as dataclass fields) so ``dataclasses.fields()`` /
# ``dataclasses.asdict()`` cannot reach the retained panel state.
# ``_fit_snapshot`` (full panel) and ``_placebo_gaps`` (per-unit gap paths)
# are panel-derived and nulled on pickle by ``__getstate__``; ``_placebo_df``
# holds the small per-unit aggregate table returned by ``get_placebo_df()``.
self._fit_snapshot: Optional[_SyntheticControlFitSnapshot] = None
self._placebo_gaps: Optional[Dict[Any, Dict[Any, float]]] = None
self._placebo_df: Optional[pd.DataFrame] = None
# Whether the treated unit's own inner Frank-Wolfe weight solve converged.
# in_space_placebo() fails closed when this is False: a truncated treated
# fit makes the ranked statistic (rmspe_ratio) not a valid SCM optimum.
self._fit_converged: bool = True
# Explicit reason an in-space placebo run was infeasible/absent, set by
# in_space_placebo(). summary() / _scm_native render THIS instead of
# reconstructing the cause from counts — n_placebos/n_failed alone cannot
# tell a non-converged treated fit ("treated_fit_nonconverged", n_failed=0)
# apart from too few donors ("too_few_donors", also n_failed=0). Values:
# None (not run), "ran", "treated_fit_nonconverged", "too_few_donors",
# "all_placebos_failed". A small string, so it survives pickling.
self._placebo_status: Optional[str] = None
# --- ADH 2015 §4 robustness diagnostics (opt-in, populated by ---
# --- leave_one_out() / in_time_placebo()). Same panel-vs-scalar split as ---
# --- the in-space placebo: the small per-row tables (_loo_df / _in_time_df), ---
# --- scalar summaries and status strings survive pickling; the per-refit ---
# --- gap-path dicts (_loo_gaps / _in_time_gaps) are panel-derived and nulled ---
# --- by __getstate__. analytical se/t/p/ci stay NaN throughout.
self._loo_df: Optional[pd.DataFrame] = None
self._loo_gaps: Optional[Dict[Any, Dict[Any, float]]] = None
# Reason a leave-one-out run was infeasible/absent. Values: None (not run),
# "ran", "treated_fit_nonconverged", "too_few_donors", "all_refits_failed".
self._loo_status: Optional[str] = None
# (min, max) ATT across the successful leave-one-out refits (the absolute
# spread of counterfactual ATTs); None until run.
self._loo_att_range: Optional[Tuple[float, float]] = None
# The headline single-donor-dependence number: max |att_loo - baseline_att|
# over the successful drops. Baseline-RELATIVE, so a uniform shift of every
# drop away from the baseline is NOT masked the way a narrow raw att_range
# would be. None until run.
self._loo_max_abs_delta_att: Optional[float] = None
self._loo_n_failed: int = 0
self._in_time_df: Optional[pd.DataFrame] = None
self._in_time_gaps: Optional[Dict[Any, Dict[Any, float]]] = None
# Reason an in-time placebo run was infeasible/absent. Values: None (not run),
# "ran", "treated_fit_nonconverged", "too_few_pre_periods",
# "all_dates_infeasible", "all_dates_failed", "all_dates_unusable" (a mix of
# failed + infeasible dates with none usable).
self._in_time_status: Optional[str] = None
self._in_time_n_failed: int = 0
# Number of placebo dates that were dimensionally infeasible (too few pre-fake
# periods, all predictors dropped, or a zero-mass surviving custom_v). Surfaced
# alongside _in_time_n_failed so a mixed no-success run reports an accurate mix.
self._in_time_n_infeasible: int = 0
[docs]
def __getstate__(self) -> Dict[str, Any]:
"""Exclude panel-derived internal state from pickling.
``_fit_snapshot`` retains the full treated+donor panel and ``_placebo_gaps``
the per-unit gap paths — both panel-derived, a privacy/size hazard if the
pickle is sent elsewhere. The scalar placebo fields (``placebo_p_value``,
``rmspe_ratio``, ``n_placebos``, ``n_failed``) and the small ``_placebo_df``
aggregate table survive. An unpickled result keeps all public fields; a
diagnostic call that needs the snapshot (``in_space_placebo``) then raises a
ValueError directing the user to re-fit. Mirrors ``SyntheticDiDResults``.
"""
state = self.__dict__.copy()
state["_fit_snapshot"] = None
state["_placebo_gaps"] = None
# ADH-2015 diagnostic gap paths are panel-derived (same hazard as
# _placebo_gaps); the small _loo_df / _in_time_df tables + scalar summaries
# survive so a round-tripped result still reports the diagnostic, but the
# overlay gap accessors raise (re-fit to recompute).
state["_loo_gaps"] = None
state["_in_time_gaps"] = None
return state
[docs]
def __repr__(self) -> str:
"""Concise string representation."""
return (
f"SyntheticControlResults(ATT={self.att:.4f}, "
f"pre_RMSPE={self.pre_rmspe:.4f}, "
f"n_donors={self.n_donors}, "
f"v_method={self.v_method!r})"
)
@property
def coef_var(self) -> float:
"""Coefficient of variation: SE / abs(ATT). NaN here (SE is always NaN)."""
if not (np.isfinite(self.se) and self.se >= 0):
return np.nan
if not np.isfinite(self.att) or self.att == 0:
return np.nan
return self.se / abs(self.att)
@property
def is_significant(self) -> bool:
"""Always False — classic SCM produces no analytical p-value."""
return bool(np.isfinite(self.p_value) and self.p_value < self.alpha)
@property
def significance_stars(self) -> str:
"""Significance stars based on p-value (empty here — p_value is NaN)."""
return _get_significance_stars(self.p_value)
[docs]
def summary(self, alpha: Optional[float] = None) -> str:
"""
Generate a formatted summary of the estimation results.
Parameters
----------
alpha : float, optional
Significance level; defaults to the alpha used during estimation.
Returns
-------
str
Formatted summary table.
"""
alpha = alpha or self.alpha
n_top = min(5, len(self.donor_weights))
top_donors = sorted(self.donor_weights.items(), key=lambda kv: kv[1], reverse=True)[:n_top]
lines = [
"=" * 75,
"Synthetic Control Method (SCM) Estimation Results".center(75),
"Abadie, Diamond & Hainmueller (2010)".center(75),
"=" * 75,
"",
f"{'Observations:':<28} {self.n_obs:>10}",
f"{'Donor units:':<28} {self.n_donors:>10}",
f"{'Pre-treatment periods:':<28} {self.n_pre_periods:>10}",
f"{'Post-treatment periods:':<28} {self.n_post_periods:>10}",
f"{'Treated unit:':<28} {str(self.treated_unit):>10}",
"",
"-" * 75,
"Fit Diagnostics".center(75),
"-" * 75,
f"{'Pre-treatment RMSPE:':<28} {self.pre_rmspe:>10.4f}",
f"{'V selection:':<28} {self.v_method:>10}",
f"{'Standardization:':<28} {self.standardize:>10}",
]
if self.mspe_v is not None and np.isfinite(self.mspe_v):
# Under cv, mspe_v is the held-out VALIDATION-window MSPE (the CV selection
# criterion), not the pre-period objective minimized on the nested path.
_mspe_label = "Validation MSPE:" if self.v_method == "cv" else "Outer-objective MSPE:"
lines.append(f"{_mspe_label:<28} {self.mspe_v:>10.6f}")
if self.v_method == "cv" and self.v_cv_t0 is not None:
lines.append(f"{'CV train/val split (t0):':<28} {self.v_cv_t0:>10d}")
if self.survey_metadata is not None:
lines.extend(_format_survey_block(self.survey_metadata, 75))
lines.extend(
[
"",
"-" * 75,
f"{'Top donor weights (w_j)':<40}",
"-" * 75,
]
)
for unit_id, w in top_donors:
lines.append(f"{' ' + str(unit_id):<40} {w:>10.4f}")
lines.extend(
[
"",
"-" * 75,
f"{'Parameter':<15} {'Estimate':>12} {'Std. Err.':>12} "
f"{'t-stat':>10} {'P>|t|':>10}",
"-" * 75,
f"{'ATT (avg gap)':<15} {self.att:>12.4f} {'n/a':>12} " f"{'n/a':>10} {'n/a':>10}",
"-" * 75,
"",
]
)
# Three states: (1) placebo never run -> point to in_space_placebo();
# (2) run with a valid reference set -> show the permutation p-value;
# (3) run but infeasible (no placebo entered the rank, e.g. J<2 or all
# donors failed) -> say so explicitly rather than implying it was not run.
# ``_placebo_df is not None`` is the "attempted" signal (survives pickling).
placebo_attempted = self._placebo_df is not None
if placebo_attempted and np.isfinite(self.placebo_p_value):
# The classic analytical fields above stay n/a (no SE); this is the
# permutation p-value of the post/pre RMSPE ratio, p = rank/(n_placebos+1).
lines.extend(
[
"In-space placebo permutation inference "
"(Abadie-Diamond-Hainmueller 2010, Section 2.4):",
f"{' RMSPE ratio (post/pre):':<34} {self.rmspe_ratio:>10.4f}",
f"{' Permutation p-value:':<34} {self.placebo_p_value:>10.4f}",
f"{' Placebos in reference set:':<34} {self.n_placebos:>10d}"
+ (f" ({self.n_failed} excluded)" if self.n_failed else ""),
"",
"(Analytical SE is still undefined for classic SCM; the "
"p-value above is permutation-based.)",
"=" * 75,
]
)
elif placebo_attempted:
# Render the SPECIFIC reason recorded by in_space_placebo(); the count
# fields (n_placebos=0, n_failed=0) cannot tell a non-converged treated
# fit apart from too-few-donors, so do not reconstruct it from counts.
status = getattr(self, "_placebo_status", None)
if status == "treated_fit_nonconverged":
reason = [
"In-space placebo was skipped: the treated unit's own SCM fit "
"did not converge at fit time (inner Frank-Wolfe weight solve",
"and/or outer V search), so its RMSPE ratio is not a valid "
"optimum to rank against placebos. placebo_p_value is undefined",
"— re-fit with a larger inner_max_iter / looser "
"inner_min_decrease and/or a larger optimizer_options['maxiter']",
"/ more n_starts.",
]
elif status == "too_few_donors":
reason = [
"In-space placebo inference requires at least 2 donors (each "
"placebo is fit against the other donors); too few were",
"available. placebo_p_value is undefined. Inspect " "get_placebo_df().",
]
else: # "all_placebos_failed" (or a legacy unpickle without the status)
reason = [
"In-space placebo permutation inference was attempted but "
"produced no valid reference set",
f"(0 placebos entered the rank; {self.n_failed} failed to "
"converge). placebo_p_value is undefined — all donor refits",
"failed. Inspect get_placebo_df().",
]
lines.extend([*reason, "=" * 75])
else:
lines.extend(
[
"Inference: classic SCM has no analytical standard error.",
"Run in_space_placebo() for in-space permutation inference",
"(Abadie-Diamond-Hainmueller 2010, Section 2.4).",
"=" * 75,
]
)
return "\n".join(lines)
[docs]
def print_summary(self, alpha: Optional[float] = None) -> None:
"""Print the summary to stdout."""
print(self.summary(alpha))
[docs]
def to_dict(self) -> Dict[str, Any]:
"""
Convert scalar results to a dictionary.
Returns
-------
Dict[str, Any]
Dictionary of the scalar estimation results (weights/balance/gaps
are available via the ``get_*_df`` accessors).
"""
result = {
"att": self.att,
"se": self.se,
"t_stat": self.t_stat,
"p_value": self.p_value,
"conf_int_lower": self.conf_int[0],
"conf_int_upper": self.conf_int[1],
"n_obs": self.n_obs,
"n_donors": self.n_donors,
"n_pre_periods": self.n_pre_periods,
"n_post_periods": self.n_post_periods,
"pre_rmspe": self.pre_rmspe,
"mspe_v": self.mspe_v,
"treated_unit": self.treated_unit,
"v_method": self.v_method,
"v_cv_t0": self.v_cv_t0,
"standardize": self.standardize,
# In-space placebo permutation inference. rmspe_ratio is set at fit;
# placebo_p_value / n_placebos / n_failed stay at their no-inference
# defaults (NaN / 0) until in_space_placebo() runs.
"rmspe_ratio": self.rmspe_ratio,
"placebo_p_value": self.placebo_p_value,
"n_placebos": self.n_placebos,
"n_failed": self.n_failed,
}
if self.survey_metadata is not None:
sm = self.survey_metadata
result["weight_type"] = sm.weight_type
result["effective_n"] = sm.effective_n
result["design_effect"] = sm.design_effect
return result
[docs]
def to_dataframe(self) -> pd.DataFrame:
"""Convert scalar results to a single-row pandas DataFrame."""
return pd.DataFrame([self.to_dict()])
[docs]
def get_gap_df(self) -> pd.DataFrame:
"""
Get the gap (effect) path as a DataFrame, in calendar order.
Rebuilt period-keyed from ``gap_path`` using the canonical
``pre_periods + post_periods`` order so the row order is independent of
any dict-insertion order. Columns: ``period``, ``gap``, ``phase``.
Returns
-------
pandas.DataFrame
"""
rows = []
for period in list(self.pre_periods) + list(self.post_periods):
if period in self.gap_path:
phase = "post" if period in self.post_periods else "pre"
rows.append({"period": period, "gap": self.gap_path[period], "phase": phase})
return pd.DataFrame(rows, columns=["period", "gap", "phase"])
[docs]
def get_weights_df(self) -> pd.DataFrame:
"""
Get donor weights as a DataFrame, sorted by weight descending.
Returns
-------
pandas.DataFrame
Columns: ``unit``, ``weight``.
"""
items = sorted(self.donor_weights.items(), key=lambda kv: kv[1], reverse=True)
return pd.DataFrame(
[{"unit": unit, "weight": w} for unit, w in items],
columns=["unit", "weight"],
)
_PLACEBO_COLS = ["unit", "pre_mspe", "post_mspe", "rmspe_ratio", "is_treated", "status"]
[docs]
def get_placebo_df(self) -> pd.DataFrame:
"""
Get the in-space placebo distribution as a DataFrame (one row per unit).
This is a per-unit SUMMARY table (one row per unit), enough to reproduce
the permutation rank and a ratio-distribution plot — NOT the per-period
placebo gap paths needed for the classic "spaghetti" plot (those are
retained internally on ``_placebo_gaps`` for the successful placebos).
Columns: ``unit``, ``pre_mspe``, ``post_mspe``, ``rmspe_ratio``,
``is_treated``, ``status`` (``"treated"`` / ``"placebo"`` / ``"failed"``).
The treated unit is always present as a single ``is_treated=True,
status="treated"`` row (its ratio is the original J-donor fit). After a
placebo run **that produced a reference set** (``>= 2`` donors AND a
converged treated fit), the table has ``n_donors + 1`` rows — every donor
appears, including those whose refit did not converge (``status="failed"``
with NaN metrics, excluded from the rank). In the degenerate / fail-closed
cases (fewer than 2 donors, or a treated fit that did not converge) the
placebo loop does not run, so only the treated row is returned.
Populated by :meth:`in_space_placebo`; the summary table is retained on
pickling, so it is still returned after a round-trip. Before any placebo
run — including on an unpickled result that never ran one — only the
treated row is returned.
Returns
-------
pandas.DataFrame
"""
if self._placebo_df is not None:
return self._placebo_df.copy()
from diff_diff.synthetic_control import _mspe
pre = _mspe(self.gap_path, self.pre_periods)
post = _mspe(self.gap_path, self.post_periods)
return pd.DataFrame(
[
{
"unit": self.treated_unit,
"pre_mspe": pre,
"post_mspe": post,
"rmspe_ratio": self.rmspe_ratio,
"is_treated": True,
"status": "treated",
}
],
columns=self._PLACEBO_COLS,
)
[docs]
def in_space_placebo(
self,
n_starts: Optional[int] = None,
) -> pd.DataFrame:
"""
In-space placebo permutation inference (Abadie-Diamond-Hainmueller 2010,
Section 2.4).
Reassigns the treatment to each donor in turn, re-estimates a synthetic
control for that pseudo-treated donor against the OTHER donors, and ranks
the real treated unit's post/pre RMSPE ratio among all units. Populates
``placebo_p_value``, ``n_placebos`` and ``n_failed`` on this object
(``rmspe_ratio`` — the treated unit's own ratio — is set at fit time) and
returns the placebo distribution via :meth:`get_placebo_df`.
The real treated unit is **excluded from every placebo's donor pool**: its
post-period outcome is treatment-contaminated, so allowing a placebo to
load weight on it would bias the placebo gap. The ranking set is therefore
the ``J+1`` units ``{treated} ∪ {J placebos}``, with each placebo fit
against the other ``J-1`` donors (this matches the standard
``SCtools::generate.placebos`` construction). The post/pre RMSPE ratio
normalizes by pre-treatment fit, which obviates the pre-fit-cutoff
filtering of ADH Figures 5-7 (journal p. 502), so no pre-fit filter is
offered — every converged placebo enters the rank.
The permutation ``placebo_p_value`` is intentionally distinct from
``p_value`` (which stays NaN — classic SCM has no analytical SE) and from
``is_significant`` (which also stays bound to the NaN ``p_value``).
A placebo is **excluded** from the reference set (counted in ``n_failed``)
when its fit is not a valid optimum — EITHER its inner Frank-Wolfe weight
solve did not converge (a truncated ``W`` is unusable) OR its outer ``V``
search did not converge (an under-optimized ``V`` fits the pre-period worse,
shrinking its RMSPE ratio and biasing the permutation p-value
anti-conservatively). Each placebo refit **inherits the original fit's
``optimizer_options`` / ``n_starts``**, so valid inference requires settings
adequate for the outer ``V`` search to converge: production defaults do;
with cheap settings, raise ``n_starts`` here or re-fit with a larger
``optimizer_options['maxiter']`` (otherwise placebos are dropped as failed).
The treated unit's own fit is held to the same standard — if its inner OR
outer search did not converge, the whole run fails closed (see below).
Parameters
----------
n_starts : int, optional
Override the multistart count for each placebo's outer V search (nested/cv).
Default None inherits the original fit's ``n_starts``. The placebo
loop is the cost driver (one outer V search per donor); lower it for a
faster, coarser scan.
Returns
-------
pandas.DataFrame
The placebo distribution (see :meth:`get_placebo_df`).
Raises
------
ValueError
If the fit snapshot is unavailable (e.g. this result was unpickled).
"""
if self._fit_snapshot is None:
raise ValueError(
"in_space_placebo() requires the fit snapshot on the results "
"object. This result appears to have been loaded from "
"serialization (which excludes the snapshot) or produced by an "
"older estimator version. Re-fit to enable in-space placebo "
"inference."
)
from diff_diff.synthetic_control import _mspe, _placebo_fit_unit
snap = self._fit_snapshot
donors = list(snap.donor_ids)
n_donors = len(donors)
if n_starts is None:
n_starts_eff = snap.n_starts
else:
# Mirror the estimator constructor's validation (synthetic_control.py)
# so a bad override fails fast instead of silently coercing (e.g. via
# int(0)/int(-1)) into a degenerate or invalid permutation procedure.
if not isinstance(n_starts, (int, np.integer)) or n_starts < 1:
raise ValueError(f"n_starts override must be a positive integer, got {n_starts!r}")
n_starts_eff = int(n_starts)
treated_pre = _mspe(self.gap_path, snap.pre_periods)
treated_post = _mspe(self.gap_path, snap.post_periods)
treated_ratio = self.rmspe_ratio
rows: List[Dict[str, Any]] = [
{
"unit": snap.treated_id,
"pre_mspe": treated_pre,
"post_mspe": treated_post,
"rmspe_ratio": treated_ratio,
"is_treated": True,
"status": "treated",
}
]
# Fail closed when the treated unit's OWN fit did not converge at fit time
# (inner Frank-Wolfe weight solve OR outer V search): ranking a statistic
# from a truncated / under-optimized treated fit would not be a valid ADH
# 2010 §2.4 permutation (placebos already fail-closed on non-convergence, so
# the treated unit must too). ``_fit_converged`` folds both failure modes, so
# the remediation names the knobs for each.
if not self._fit_converged:
warnings.warn(
"In-space placebo skipped: the treated unit's own SCM fit did not "
"converge at fit time (inner Frank-Wolfe weight solve and/or outer V "
"search), so its RMSPE ratio is not a valid optimum to rank against "
"placebos. placebo_p_value is NaN — re-fit with a larger "
"inner_max_iter / looser inner_min_decrease (inner) and/or a larger "
"optimizer_options['maxiter'] / more n_starts (outer V search).",
UserWarning,
stacklevel=2,
)
self.placebo_p_value = np.nan
self.n_placebos = 0
self.n_failed = 0
self._placebo_gaps = {}
self._placebo_status = "treated_fit_nonconverged"
self._placebo_df = pd.DataFrame(rows, columns=self._PLACEBO_COLS)
return self._placebo_df.copy()
if n_donors < 2:
warnings.warn(
"In-space placebo inference requires at least 2 donors (each "
f"placebo is fit against the other donors); only {n_donors} "
"available. placebo_p_value is NaN.",
UserWarning,
stacklevel=2,
)
self.placebo_p_value = np.nan
self.n_placebos = 0
self.n_failed = 0
self._placebo_gaps = {}
self._placebo_status = "too_few_donors"
self._placebo_df = pd.DataFrame(rows, columns=self._PLACEBO_COLS)
return self._placebo_df.copy()
if n_donors == 2:
warnings.warn(
"In-space placebo with 2 donors: each placebo is fit against a "
"single donor (degenerate weight w=[1]) with no V search, so the "
"permutation p-value is coarse (only 2 placebos enter the "
"reference set; the smallest attainable p-value is 1/3).",
UserWarning,
stacklevel=2,
)
placebo_gaps: Dict[Any, Dict[Any, float]] = {}
ranked_ratios: List[float] = []
n_failed = 0
for j in donors:
pool = [d for d in donors if d != j]
fitted = _placebo_fit_unit(snap, j, pool, n_starts_eff)
if fitted is None:
# Non-converged inner Frank-Wolfe weight solve (a truncated W is
# unusable for ranking): exclude from BOTH the numerator and the
# denominator (never penalize a truncated solve into the rank).
# Still record the donor with NaN metrics so get_placebo_df()
# returns the full treated + every-donor unit set.
n_failed += 1
rows.append(
{
"unit": j,
"pre_mspe": np.nan,
"post_mspe": np.nan,
"rmspe_ratio": np.nan,
"is_treated": False,
"status": "failed",
}
)
continue
gap_path_j, ratio_j = fitted
placebo_gaps[j] = gap_path_j
pre_j = _mspe(gap_path_j, snap.pre_periods)
post_j = _mspe(gap_path_j, snap.post_periods)
ranked_ratios.append(ratio_j)
rows.append(
{
"unit": j,
"pre_mspe": pre_j,
"post_mspe": post_j,
"rmspe_ratio": ratio_j,
"is_treated": False,
"status": "placebo",
}
)
n_placebos = len(ranked_ratios)
if n_placebos == 0:
warnings.warn(
"No in-space placebo entered the reference set (all donors "
f"failed to converge or were filtered out of {n_donors}); "
"placebo_p_value is NaN.",
UserWarning,
stacklevel=2,
)
p_value = np.nan
else:
# Upper-tail rank on the (unsigned) RMSPE ratio, treated unit included
# as the "+1". Ties counted via ``>=`` so the p-value is conservative.
# (The ratio squares the gaps -> direction-agnostic, NOT a signed test.)
rank = 1 + sum(1 for r in ranked_ratios if r >= treated_ratio)
p_value = rank / (n_placebos + 1)
if n_failed > 0:
cv_note = (
" Under v_method='cv' an excluded refit may instead be STRUCTURALLY "
"infeasible (the pseudo-treated unit's donor pool is indistinguishable in a "
"re-aggregated CV window) — remedied by adjusting the predictors, v_cv_t0, "
"or the donor pool, NOT inner_max_iter / n_starts."
if snap.v_method == "cv"
else ""
)
warnings.warn(
f"{n_failed} of {n_donors} in-space placebos were excluded from the "
"permutation distribution (the refit did not reach a valid optimum — a "
"non-converged inner weight solve or outer V search); "
f"placebo_p_value uses the remaining {n_placebos}.{cv_note}",
UserWarning,
stacklevel=2,
)
self.placebo_p_value = float(p_value)
self.n_placebos = int(n_placebos)
self.n_failed = int(n_failed)
self._placebo_gaps = placebo_gaps
self._placebo_status = "ran" if n_placebos > 0 else "all_placebos_failed"
self._placebo_df = pd.DataFrame(rows, columns=self._PLACEBO_COLS)
return self._placebo_df.copy()
_LOO_COLS = [
"dropped_unit",
"att",
"pre_rmspe",
"post_rmspe",
"rmspe_ratio",
"delta_att",
"status",
]
[docs]
def leave_one_out(self, n_starts: Optional[int] = None) -> pd.DataFrame:
"""
Leave-one-out donor robustness (Abadie-Diamond-Hainmueller 2015, Section 4).
Drops each **reportably-weighted** donor, one at a time, and re-fits the
treated unit's synthetic control against the remaining donor pool. The
per-drop ATTs reveal whether the estimated effect is driven by any single
donor (ADH 2015 overlay the leave-one-out counterfactual trajectories for
this purpose; :meth:`get_leave_one_out_gaps` returns those paths). This is a
thin re-run of the validated SCM solver — it has **no analytical standard
error**; ``se``/``t_stat``/``p_value``/``conf_int`` and ``is_significant``
are unaffected (still bound to the NaN analytical ``p_value``).
The drop set is exactly the donors in ``donor_weights`` — those above the
``1e-6`` interpretability floor (``synthetic_control._MIN_REPORT_WEIGHT``).
A donor with negligible weight ``0 < w ≤ 1e-6`` is excluded (its removal
moves the ATT by ~the weight, so its ``delta_att`` would be ~0 — an
uninformative row), keeping the LOO table aligned with the reported support;
a zero-weight donor's removal leaves the synthetic unchanged. (This `1e-6`
approximation of "positive weight" is documented in REGISTRY §SyntheticControl.)
A donor that carries ALL the weight is still dropped (the others absorb its
mass on re-fit); its large ``delta_att`` is exactly the single-donor-dependence
signal this diagnostic exists to surface, NOT a failure.
Parameters
----------
n_starts : int, optional
Override the multistart count for each leave-one-out refit's outer V
search (nested/cv). Default None inherits the original fit's ``n_starts``.
Returns
-------
pandas.DataFrame
One ``status="baseline"`` row (the full fit, ``delta_att=0``) followed by
one row per dropped donor (``status="loo"``, or ``"failed"`` with NaN
metrics when its refit did not converge), sorted by ``|delta_att|``
descending (failed rows last). Columns: ``dropped_unit``, ``att``,
``pre_rmspe``, ``post_rmspe``, ``rmspe_ratio``, ``delta_att``
(``att_loo - full_att``), ``status``.
Raises
------
ValueError
If the fit snapshot is unavailable (e.g. this result was unpickled).
"""
if self._fit_snapshot is None:
raise ValueError(
"leave_one_out() requires the fit snapshot on the results object. "
"This result appears to have been loaded from serialization (which "
"excludes the snapshot) or produced by an older estimator version. "
"Re-fit to enable leave-one-out donor robustness."
)
from diff_diff.synthetic_control import _mspe, _placebo_fit_unit
snap = self._fit_snapshot
if n_starts is None:
n_starts_eff = snap.n_starts
else:
# Mirror the estimator constructor's validation so a bad override fails
# fast instead of silently coercing into a degenerate refit (cf.
# in_space_placebo()).
if not isinstance(n_starts, (int, np.integer)) or n_starts < 1:
raise ValueError(f"n_starts override must be a positive integer, got {n_starts!r}")
n_starts_eff = int(n_starts)
# Baseline row: read DIRECTLY from the full fit (do NOT re-fit), so the
# reference ATT — and therefore delta_att=0.0 — is exact.
baseline_row = {
"dropped_unit": None,
"att": float(self.att),
"pre_rmspe": float(self.pre_rmspe),
"post_rmspe": float(np.sqrt(_mspe(self.gap_path, snap.post_periods))),
"rmspe_ratio": float(self.rmspe_ratio),
"delta_att": 0.0,
"status": "baseline",
}
# Fail closed when the treated unit's own fit did not converge: a truncated /
# under-optimized baseline ATT makes every leave-one-out delta meaningless.
if not self._fit_converged:
warnings.warn(
"Leave-one-out skipped: the treated unit's own SCM fit did not "
"converge at fit time (inner Frank-Wolfe weight solve and/or outer V "
"search), so the baseline ATT is not a valid optimum to compare "
"leave-one-out refits against. Re-fit with a larger inner_max_iter / "
"looser inner_min_decrease (inner) and/or a larger "
"optimizer_options['maxiter'] / more n_starts (outer V search).",
UserWarning,
stacklevel=2,
)
self._loo_status = "treated_fit_nonconverged"
self._loo_att_range = None
self._loo_n_failed = 0
self._loo_gaps = {}
self._loo_df = pd.DataFrame([baseline_row], columns=self._LOO_COLS)
return self._loo_df.copy()
# Dropping any donor requires at least one donor left in the pool.
if len(snap.donor_ids) < 2:
warnings.warn(
"Leave-one-out donor robustness requires at least 2 donors (dropping "
f"one must leave a non-empty pool); only {len(snap.donor_ids)} "
"available. Returning the baseline fit only.",
UserWarning,
stacklevel=2,
)
self._loo_status = "too_few_donors"
self._loo_att_range = None
self._loo_n_failed = 0
self._loo_gaps = {}
self._loo_df = pd.DataFrame([baseline_row], columns=self._LOO_COLS)
return self._loo_df.copy()
# Drop the FROZEN reportably-weighted support captured at fit time (donor ids
# with weight above the 1e-6 floor, in donor_ids order). Reading the snapshot —
# NOT the mutable presentation-level self.donor_weights — makes the result
# depend only on the fit and immune to post-fit mutation of donor_weights.
pos_donors = list(snap.weighted_donor_ids)
loo_gaps: Dict[Any, Dict[Any, float]] = {}
loo_rows: List[Dict[str, Any]] = []
atts: List[float] = []
n_failed = 0
for d in pos_donors:
pool = [x for x in snap.donor_ids if x != d]
fitted = _placebo_fit_unit(snap, snap.treated_id, pool, n_starts_eff)
if fitted is None:
n_failed += 1
loo_rows.append(
{
"dropped_unit": d,
"att": np.nan,
"pre_rmspe": np.nan,
"post_rmspe": np.nan,
"rmspe_ratio": np.nan,
"delta_att": np.nan,
"status": "failed",
}
)
continue
gap_path_d, ratio_d = fitted
loo_gaps[d] = gap_path_d
att_d = float(np.mean([gap_path_d[p] for p in snap.post_periods]))
atts.append(att_d)
loo_rows.append(
{
"dropped_unit": d,
"att": att_d,
"pre_rmspe": float(np.sqrt(_mspe(gap_path_d, snap.pre_periods))),
"post_rmspe": float(np.sqrt(_mspe(gap_path_d, snap.post_periods))),
"rmspe_ratio": ratio_d,
"delta_att": att_d - float(self.att),
"status": "loo",
}
)
# Sort successful drops by |delta_att| desc (most influential donor first);
# non-converged drops sort last.
finite_rows = sorted(
(r for r in loo_rows if r["status"] == "loo"),
key=lambda r: abs(r["delta_att"]),
reverse=True,
)
failed_rows = [r for r in loo_rows if r["status"] == "failed"]
ordered = [baseline_row] + finite_rows + failed_rows
if n_failed > 0:
cv_note = (
" Under v_method='cv' a 'failed' drop may instead be STRUCTURALLY "
"infeasible (the reduced donor pool is indistinguishable in a re-aggregated "
"CV window) — remedied by adjusting the predictors, v_cv_t0, or the donor "
"pool, NOT inner_max_iter / n_starts."
if snap.v_method == "cv"
else ""
)
warnings.warn(
f"{n_failed} of {len(pos_donors)} leave-one-out refits were excluded with "
"NaN metrics (status='failed'; the refit did not reach a valid optimum — a "
"non-converged inner weight solve or outer V search); the ATT range uses "
f"the remaining refits.{cv_note}",
UserWarning,
stacklevel=2,
)
self._loo_gaps = loo_gaps
self._loo_n_failed = int(n_failed)
self._loo_att_range = (min(atts), max(atts)) if atts else None
# Baseline-relative headline: the largest swing of any single donor-drop from
# the full-fit ATT (max |delta_att|). Robust to a uniform shift that a raw
# att_range would understate.
self._loo_max_abs_delta_att = max(abs(a - float(self.att)) for a in atts) if atts else None
# Distinguish a real run from "every donor-drop refit failed to converge"
# (no valid leave-one-out estimate produced) so DR/BR do not report an empty
# diagnostic as completed. (pos_donors empty — a converged fit always has >=1
# positive weight — falls through to "ran": baseline-only, benign.)
self._loo_status = "all_refits_failed" if (pos_donors and not atts) else "ran"
self._loo_df = pd.DataFrame(ordered, columns=self._LOO_COLS)
return self._loo_df.copy()
[docs]
def get_leave_one_out_df(self) -> pd.DataFrame:
"""
Get the leave-one-out donor-robustness table (see :meth:`leave_one_out`).
Survives pickling. Raises if :meth:`leave_one_out` has not been run.
Returns
-------
pandas.DataFrame
"""
if self._loo_df is None:
raise ValueError("No leave-one-out results yet; call leave_one_out() first.")
return self._loo_df.copy()
[docs]
def get_leave_one_out_gaps(self) -> pd.DataFrame:
"""
Long-form leave-one-out gap paths, for the overlay ("spaghetti") plot.
One row per (dropped donor, period) for every converged leave-one-out refit.
Columns: ``dropped_unit``, ``period``, ``gap``, ``phase`` (``"pre"``/
``"post"``) — mirroring :meth:`get_gap_df`. These per-period paths are
panel-derived and are NOT retained after pickling.
Returns
-------
pandas.DataFrame
Raises
------
ValueError
If :meth:`leave_one_out` has not been run, or if the gap paths were
dropped on pickling (re-fit and re-run to recompute them).
"""
if self._loo_df is None:
raise ValueError("No leave-one-out results yet; call leave_one_out() first.")
if self._loo_gaps is None:
raise ValueError(
"Leave-one-out gap paths are not retained after pickling "
"(panel-derived); re-run leave_one_out() on a freshly fitted result "
"to recompute them."
)
rows: List[Dict[str, Any]] = []
for unit, gap_path in self._loo_gaps.items():
for period in list(self.pre_periods) + list(self.post_periods):
if period in gap_path:
phase = "post" if period in self.post_periods else "pre"
rows.append(
{
"dropped_unit": unit,
"period": period,
"gap": gap_path[period],
"phase": phase,
}
)
return pd.DataFrame(rows, columns=["dropped_unit", "period", "gap", "phase"])
_IN_TIME_COLS = [
"placebo_period",
"placebo_att",
"pre_fit_rmspe",
"rmspe_ratio",
"n_pre_fake",
"n_post_fake",
"n_dropped_specs",
"status",
]
[docs]
def in_time_placebo(
self,
placebo_periods: Optional[Any] = None,
n_starts: Optional[int] = None,
) -> pd.DataFrame:
"""
In-time (backdating) placebo (Abadie-Diamond-Hainmueller 2015, Section 4).
Reassigns the intervention to an earlier pre-treatment date ``t_f`` and re-fits
the synthetic control using ONLY pre-``t_f`` information, then measures the
"effect" over the held-out window ``[t_f, T0)``. A credible synthetic control
should show **no spurious gap** there (ADH 2015 Figure 4, German reunification
backdated to 1975). This is a thin re-run of the validated SCM solver — it has
**no analytical standard error**; ``se``/``t_stat``/``p_value``/``conf_int`` and
``is_significant`` are unaffected.
**Windowing convention (TRUNCATE).** The placebo fit uses only periods strictly
before ``t_f``: pre-period-outcome predictors become the pre-``t_f`` outcomes,
and covariate / special predictor windows are intersected with the pre-``t_f``
window. A predictor window lying ENTIRELY in the held-out region ``[t_f, T0)``
is dropped (surfaced in ``n_dropped_specs`` + an aggregated warning). For
outcome-predictor fits this equals the literal "lag the predictors" re-run of a
manual ``Synth::synth`` (R has no in-time-placebo function); see
``docs/methodology/REGISTRY.md`` for the recognized deviation note.
Parameters
----------
placebo_periods : period value or list of period values, optional
The pseudo-intervention date(s), each a member of ``pre_periods``. Default
None sweeps every feasible interior pre-date (at least 2 pre-fake periods to
fit + at least 1 post-fake period to measure the gap). A date that is a true
post-treatment period, or not a pre-period at all, raises ``ValueError``; a
valid pre-date that is dimensionally infeasible (too few pre-fake periods, or
all predictors dropped) yields a ``status="infeasible"`` row (no raise).
n_starts : int, optional
Override the multistart count for each placebo refit's outer V search (nested/cv).
Default None inherits the original fit's ``n_starts``.
Returns
-------
pandas.DataFrame
One row per placebo date. Columns: ``placebo_period``, ``placebo_att`` (mean
gap over the held-out window — should be ~0 if no real pre-period effect),
``pre_fit_rmspe``, ``rmspe_ratio`` (post-fake/pre-fake), ``n_pre_fake``,
``n_post_fake``, ``n_dropped_specs``, ``status`` (``"ran"`` / ``"infeasible"``
/ ``"failed"``).
Raises
------
ValueError
If the fit snapshot is unavailable (e.g. this result was unpickled), or an
explicit ``placebo_periods`` entry is a post-treatment period / not a
pre-period.
"""
if self._fit_snapshot is None:
raise ValueError(
"in_time_placebo() requires the fit snapshot on the results object. "
"This result appears to have been loaded from serialization (which "
"excludes the snapshot) or produced by an older estimator version. "
"Re-fit to enable the in-time placebo."
)
from diff_diff.synthetic_control import (
_mspe,
_placebo_fit_unit,
_truncate_snapshot_in_time,
)
snap = self._fit_snapshot
if n_starts is None:
n_starts_eff = snap.n_starts
else:
if not isinstance(n_starts, (int, np.integer)) or n_starts < 1:
raise ValueError(f"n_starts override must be a positive integer, got {n_starts!r}")
n_starts_eff = int(n_starts)
pre = list(snap.pre_periods)
empty = pd.DataFrame([], columns=self._IN_TIME_COLS)
# Fail closed when the treated unit's own fit did not converge: a truncated /
# under-optimized baseline makes the placebo comparison meaningless.
if not self._fit_converged:
warnings.warn(
"In-time placebo skipped: the treated unit's own SCM fit did not "
"converge at fit time (inner Frank-Wolfe weight solve and/or outer V "
"search). Re-fit with a larger inner_max_iter / looser "
"inner_min_decrease (inner) and/or a larger optimizer_options['maxiter'] "
"/ more n_starts (outer V search).",
UserWarning,
stacklevel=2,
)
self._in_time_status = "treated_fit_nonconverged"
self._in_time_n_failed = 0
self._in_time_gaps = {}
self._in_time_df = empty
return empty.copy()
# A feasible date needs >=2 pre-fake + >=1 post-fake period -> >=3 pre periods.
# The >=2 pre-fake rule is a deliberate Note-documented restriction (an auto-
# swept single-pre-fake placebo is a non-credible pre-fit; see REGISTRY).
if len(pre) < 3:
warnings.warn(
"In-time placebo requires at least 3 pre-treatment periods (a feasible "
"placebo date needs >=2 pre-fake periods to fit and >=1 post-fake period "
f"to measure the gap); only {len(pre)} available.",
UserWarning,
stacklevel=2,
)
self._in_time_status = "too_few_pre_periods"
self._in_time_n_failed = 0
self._in_time_gaps = {}
self._in_time_df = empty
return empty.copy()
if placebo_periods is None:
# Sweep every feasible pre-date (positional: idx>=2 gives >=2 pre-fake +
# >=1 post-fake; idx<2 would leave fewer than 2 pre-fake periods).
dates: List[Any] = [pre[i] for i in range(2, len(pre))]
else:
if isinstance(placebo_periods, (list, tuple, set, np.ndarray, pd.Index, pd.Series)):
dates = list(placebo_periods)
else:
dates = [placebo_periods]
# An explicit but EMPTY container is a malformed request (NOT "every date
# was infeasible") — fail fast, consistent with the post-date / non-pre
# date raises below. Pass None to sweep all feasible pre-dates.
if not dates:
raise ValueError(
"placebo_periods is empty; pass None to sweep all feasible "
"pre-dates, or a non-empty list of pre-period date(s)."
)
pre_set = set(pre)
post_set = set(snap.post_periods)
for d in dates:
if d in post_set:
raise ValueError(
f"placebo_period {d!r} is a true post-treatment period; an "
"in-time placebo date must lie in the pre-treatment window."
)
if d not in pre_set:
raise ValueError(
f"placebo_period {d!r} is not a pre-treatment period "
f"(pre_periods = {pre})."
)
# De-duplicate + canonicalize to pre-period order (mirrors _resolve_periods):
# duplicate / unordered explicit dates must not trigger duplicate refits or
# inflate n_dates.
_requested = set(dates)
dates = [p for p in pre if p in _requested]
in_time_gaps: Dict[Any, Dict[Any, float]] = {}
rows: List[Dict[str, Any]] = []
dropped_all: set = set()
n_failed = 0
n_infeasible = 0
n_ran = 0
for t_f in dates:
idx = pre.index(t_f)
n_pre_fake = idx
n_post_fake = len(pre) - idx
snap_mod, dropped = _truncate_snapshot_in_time(snap, t_f)
dropped_all.update(dropped)
if snap_mod is None:
n_infeasible += 1
rows.append(
{
"placebo_period": t_f,
"placebo_att": np.nan,
"pre_fit_rmspe": np.nan,
"rmspe_ratio": np.nan,
"n_pre_fake": n_pre_fake,
"n_post_fake": n_post_fake,
"n_dropped_specs": len(dropped),
"status": "infeasible",
}
)
continue
fitted = _placebo_fit_unit(snap_mod, snap.treated_id, snap.donor_ids, n_starts_eff)
if fitted is None:
n_failed += 1
rows.append(
{
"placebo_period": t_f,
"placebo_att": np.nan,
"pre_fit_rmspe": np.nan,
"rmspe_ratio": np.nan,
"n_pre_fake": n_pre_fake,
"n_post_fake": n_post_fake,
"n_dropped_specs": len(dropped),
"status": "failed",
}
)
continue
gap_path, ratio = fitted
in_time_gaps[t_f] = gap_path
placebo_att = float(np.mean([gap_path[p] for p in snap_mod.post_periods]))
rows.append(
{
"placebo_period": t_f,
"placebo_att": placebo_att,
"pre_fit_rmspe": float(np.sqrt(_mspe(gap_path, snap_mod.pre_periods))),
"rmspe_ratio": ratio,
"n_pre_fake": n_pre_fake,
"n_post_fake": n_post_fake,
"n_dropped_specs": len(dropped),
"status": "ran",
}
)
n_ran += 1
if dropped_all:
warnings.warn(
"In-time placebo (TRUNCATE convention): predictor(s) "
f"{sorted(map(str, dropped_all))} fell entirely in the held-out "
"post-fake window for some placebo date(s) and were dropped from those "
"refits (see the n_dropped_specs column).",
UserWarning,
stacklevel=2,
)
if n_infeasible > 0:
warnings.warn(
f"{n_infeasible} in-time placebo date(s) were structurally infeasible "
"(too few pre-fake periods, all predictors dropped, or — under "
"v_method='cv' — a kept predictor no longer spans both windows, or a "
"re-aggregated window loses cross-donor variation, after truncation) and "
"are reported with status='infeasible' (NaN metrics).",
UserWarning,
stacklevel=2,
)
if n_failed > 0:
warnings.warn(
f"{n_failed} in-time placebo refit(s) failed to converge and are "
"reported with status='failed' (NaN metrics).",
UserWarning,
stacklevel=2,
)
self._in_time_gaps = in_time_gaps
self._in_time_n_failed = int(n_failed)
self._in_time_n_infeasible = int(n_infeasible)
# When no date ran, classify the cause precisely so the downstream reason text
# is never false: a pure convergence failure ("all_dates_failed", actionable —
# raise n_starts / loosen tolerances) and pure dimensional infeasibility
# ("all_dates_infeasible", structural) are distinct; a MIX of both gets its own
# "all_dates_unusable" code (both counters are surfaced) rather than being
# mislabeled as exclusively one or the other.
if n_ran > 0:
self._in_time_status = "ran"
elif n_failed > 0 and n_infeasible > 0:
self._in_time_status = "all_dates_unusable"
elif n_failed > 0:
self._in_time_status = "all_dates_failed"
else:
self._in_time_status = "all_dates_infeasible"
self._in_time_df = pd.DataFrame(rows, columns=self._IN_TIME_COLS)
return self._in_time_df.copy()
[docs]
def get_in_time_placebo_df(self) -> pd.DataFrame:
"""
Get the in-time placebo table (see :meth:`in_time_placebo`).
Survives pickling. Raises if :meth:`in_time_placebo` has not been run.
Returns
-------
pandas.DataFrame
"""
if self._in_time_df is None:
raise ValueError("No in-time placebo results yet; call in_time_placebo() first.")
return self._in_time_df.copy()
[docs]
def get_in_time_placebo_gaps(self) -> pd.DataFrame:
"""
Long-form in-time placebo gap paths, for the backdating overlay plot.
One row per (placebo date, period) for every converged in-time refit. Columns:
``placebo_period``, ``period``, ``gap``, ``phase`` (``"pre_fake"`` for periods
before the placebo date, ``"post_fake"`` for the held-out window from it on).
These per-period paths are panel-derived and are NOT retained after pickling.
Returns
-------
pandas.DataFrame
Raises
------
ValueError
If :meth:`in_time_placebo` has not been run, or if the gap paths were
dropped on pickling (re-fit and re-run to recompute them).
"""
if self._in_time_df is None:
raise ValueError("No in-time placebo results yet; call in_time_placebo() first.")
if self._in_time_gaps is None:
raise ValueError(
"In-time placebo gap paths are not retained after pickling "
"(panel-derived); re-run in_time_placebo() on a freshly fitted result "
"to recompute them."
)
pre = list(self.pre_periods)
rows: List[Dict[str, Any]] = []
for t_f, gap_path in self._in_time_gaps.items():
split = pre.index(t_f)
for period in pre:
if period in gap_path:
phase = "post_fake" if pre.index(period) >= split else "pre_fake"
rows.append(
{
"placebo_period": t_f,
"period": period,
"gap": gap_path[period],
"phase": phase,
}
)
return pd.DataFrame(rows, columns=["placebo_period", "period", "gap", "phase"])