"""
Synthetic Difference-in-Differences estimator.
"""
import warnings
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
from numpy.linalg import LinAlgError
from diff_diff.estimators import DifferenceInDifferences
from diff_diff.linalg import solve_ols
from diff_diff.results import SyntheticDiDResults
from diff_diff.utils import (
_compute_regularization,
_sum_normalize,
compute_sdid_estimator,
compute_sdid_unit_weights,
compute_time_weights,
safe_inference,
validate_binary,
)
[docs]
class SyntheticDiD(DifferenceInDifferences):
"""
Synthetic Difference-in-Differences (SDID) estimator.
Combines the strengths of Difference-in-Differences and Synthetic Control
methods by re-weighting control units to better match treated units'
pre-treatment trends.
This method is particularly useful when:
- You have few treated units (possibly just one)
- Parallel trends assumption may be questionable
- Control units are heterogeneous and need reweighting
- You want robustness to pre-treatment differences
Parameters
----------
zeta_omega : float, optional
Regularization for unit weights. If None (default), auto-computed
from data as ``(N1 * T1)^(1/4) * noise_level`` matching R's synthdid.
zeta_lambda : float, optional
Regularization for time weights. If None (default), auto-computed
from data as ``1e-6 * noise_level`` matching R's synthdid.
alpha : float, default=0.05
Significance level for confidence intervals.
variance_method : str, default="placebo"
Method for variance estimation:
- "placebo": Placebo-based variance matching R's synthdid::vcov(method="placebo").
Implements Algorithm 4 from Arkhangelsky et al. (2021). This is R's default.
- "bootstrap": Bootstrap at unit level with fixed weights matching R's
synthdid::vcov(method="bootstrap").
n_bootstrap : int, default=200
Number of replications for variance estimation. Used for both:
- Bootstrap: Number of bootstrap samples
- Placebo: Number of random permutations (matches R's `replications` argument)
seed : int, optional
Random seed for reproducibility. If None (default), results
will vary between runs.
Attributes
----------
results_ : SyntheticDiDResults
Estimation results after calling fit().
is_fitted_ : bool
Whether the model has been fitted.
Examples
--------
Basic usage with panel data:
>>> import pandas as pd
>>> from diff_diff import SyntheticDiD
>>>
>>> # Panel data with units observed over multiple time periods
>>> # Treatment occurs at period 5 for treated units
>>> data = pd.DataFrame({
... 'unit': [...], # Unit identifier
... 'period': [...], # Time period
... 'outcome': [...], # Outcome variable
... 'treated': [...] # 1 if unit is ever treated, 0 otherwise
... })
>>>
>>> # Fit SDID model
>>> sdid = SyntheticDiD()
>>> results = sdid.fit(
... data,
... outcome='outcome',
... treatment='treated',
... unit='unit',
... time='period',
... post_periods=[5, 6, 7, 8]
... )
>>>
>>> # View results
>>> results.print_summary()
>>> print(f"ATT: {results.att:.3f} (SE: {results.se:.3f})")
>>>
>>> # Examine unit weights
>>> weights_df = results.get_unit_weights_df()
>>> print(weights_df.head(10))
Notes
-----
The SDID estimator (Arkhangelsky et al., 2021) computes:
τ̂ = (Ȳ_treated,post - Σ_t λ_t * Y_treated,t)
- Σ_j ω_j * (Ȳ_j,post - Σ_t λ_t * Y_j,t)
Where:
- ω_j are unit weights (sum to 1, non-negative)
- λ_t are time weights (sum to 1, non-negative)
Unit weights ω are chosen to match pre-treatment outcomes:
min ||Σ_j ω_j * Y_j,pre - Y_treated,pre||²
This interpolates between:
- Standard DiD (uniform weights): ω_j = 1/N_control
- Synthetic Control (exact matching): concentrated weights
References
----------
Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
(2021). Synthetic Difference-in-Differences. American Economic Review,
111(12), 4088-4118.
"""
[docs]
def __init__(
self,
zeta_omega: Optional[float] = None,
zeta_lambda: Optional[float] = None,
alpha: float = 0.05,
variance_method: str = "placebo",
n_bootstrap: int = 200,
seed: Optional[int] = None,
# Deprecated — accepted for backward compat, ignored with warning
lambda_reg: Optional[float] = None,
zeta: Optional[float] = None,
):
if lambda_reg is not None:
warnings.warn(
"lambda_reg is deprecated and ignored. Regularization is now "
"auto-computed from data. Use zeta_omega to override unit weight "
"regularization.",
DeprecationWarning,
stacklevel=2,
)
if zeta is not None:
warnings.warn(
"zeta is deprecated and ignored. Use zeta_lambda to override "
"time weight regularization.",
DeprecationWarning,
stacklevel=2,
)
super().__init__(robust=True, cluster=None, alpha=alpha)
self.zeta_omega = zeta_omega
self.zeta_lambda = zeta_lambda
self.variance_method = variance_method
self.n_bootstrap = n_bootstrap
self.seed = seed
# Validate n_bootstrap
if n_bootstrap < 2:
raise ValueError(
f"n_bootstrap must be >= 2 (got {n_bootstrap}). At least 2 "
f"iterations are needed to estimate standard errors."
)
# Validate variance_method
valid_methods = ("bootstrap", "placebo")
if variance_method not in valid_methods:
raise ValueError(
f"variance_method must be one of {valid_methods}, "
f"got '{variance_method}'"
)
self._unit_weights = None
self._time_weights = None
[docs]
def fit( # type: ignore[override]
self,
data: pd.DataFrame,
outcome: str,
treatment: str,
unit: str,
time: str,
post_periods: Optional[List[Any]] = None,
covariates: Optional[List[str]] = None
) -> SyntheticDiDResults:
"""
Fit the Synthetic Difference-in-Differences model.
Parameters
----------
data : pd.DataFrame
Panel data with observations for multiple units over multiple
time periods.
outcome : str
Name of the outcome variable column.
treatment : str
Name of the treatment group indicator column (0/1).
Should be 1 for all observations of treated units
(both pre and post treatment).
unit : str
Name of the unit identifier column.
time : str
Name of the time period column.
post_periods : list, optional
List of time period values that are post-treatment.
If None, uses the last half of periods.
covariates : list, optional
List of covariate column names. Covariates are residualized
out before computing the SDID estimator.
Returns
-------
SyntheticDiDResults
Object containing the ATT estimate, standard error,
unit weights, and time weights.
Raises
------
ValueError
If required parameters are missing or data validation fails.
"""
# Validate inputs
if outcome is None or treatment is None or unit is None or time is None:
raise ValueError(
"Must provide 'outcome', 'treatment', 'unit', and 'time'"
)
# Check columns exist
required_cols = [outcome, treatment, unit, time]
if covariates:
required_cols.extend(covariates)
missing = [c for c in required_cols if c not in data.columns]
if missing:
raise ValueError(f"Missing columns: {missing}")
# Validate treatment is binary
validate_binary(data[treatment].values, "treatment")
# Get all unique time periods
all_periods = sorted(data[time].unique())
if len(all_periods) < 2:
raise ValueError("Need at least 2 time periods")
# Determine pre and post periods
if post_periods is None:
mid = len(all_periods) // 2
post_periods = list(all_periods[mid:])
pre_periods = list(all_periods[:mid])
else:
post_periods = list(post_periods)
pre_periods = [p for p in all_periods if p not in post_periods]
if len(post_periods) == 0:
raise ValueError("Must have at least one post-treatment period")
if len(pre_periods) == 0:
raise ValueError("Must have at least one pre-treatment period")
# Validate post_periods are in data
for p in post_periods:
if p not in all_periods:
raise ValueError(f"Post-period '{p}' not found in time column")
# Identify treated and control units
# Treatment indicator should be constant within unit
unit_treatment = data.groupby(unit)[treatment].first()
# Validate treatment is constant within unit (SDID requires block treatment)
treatment_nunique = data.groupby(unit)[treatment].nunique()
varying_units = treatment_nunique[treatment_nunique > 1]
if len(varying_units) > 0:
example_unit = varying_units.index[0]
example_vals = sorted(
data.loc[data[unit] == example_unit, treatment].unique()
)
raise ValueError(
f"Treatment indicator varies within {len(varying_units)} unit(s) "
f"(e.g., unit '{example_unit}' has values {example_vals}). "
f"SyntheticDiD requires 'block' treatment where treatment is "
f"constant within each unit across all time periods. "
f"For staggered adoption designs, use CallawaySantAnna or "
f"ImputationDiD instead."
)
treated_units = unit_treatment[unit_treatment == 1].index.tolist()
control_units = unit_treatment[unit_treatment == 0].index.tolist()
if len(treated_units) == 0:
raise ValueError("No treated units found")
if len(control_units) == 0:
raise ValueError("No control units found")
# Validate balanced panel (SDID requires all units observed in all periods)
periods_per_unit = data.groupby(unit)[time].nunique()
expected_n_periods = len(all_periods)
unbalanced_units = periods_per_unit[periods_per_unit != expected_n_periods]
if len(unbalanced_units) > 0:
example_unit = unbalanced_units.index[0]
actual_count = unbalanced_units.iloc[0]
raise ValueError(
f"Panel is not balanced: {len(unbalanced_units)} unit(s) do not "
f"have observations in all {expected_n_periods} periods "
f"(e.g., unit '{example_unit}' has {actual_count} periods). "
f"SyntheticDiD requires a balanced panel. Use "
f"diff_diff.prep.balance_panel() to balance the panel first."
)
# Residualize covariates if provided
working_data = data.copy()
if covariates:
working_data = self._residualize_covariates(
working_data, outcome, covariates, unit, time
)
# Create outcome matrices
# Shape: (n_periods, n_units)
Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated = \
self._create_outcome_matrices(
working_data, outcome, unit, time,
pre_periods, post_periods, treated_units, control_units
)
# Compute auto-regularization (or use user overrides)
auto_zeta_omega, auto_zeta_lambda = _compute_regularization(
Y_pre_control, len(treated_units), len(post_periods)
)
zeta_omega = self.zeta_omega if self.zeta_omega is not None else auto_zeta_omega
zeta_lambda = self.zeta_lambda if self.zeta_lambda is not None else auto_zeta_lambda
# Store noise level for diagnostics
from diff_diff.utils import _compute_noise_level
noise_level = _compute_noise_level(Y_pre_control)
# Data-dependent convergence threshold (matches R's 1e-5 * noise.level).
# Floor of 1e-5 when noise_level == 0: R would use 0.0, causing FW to
# run all max_iter iterations. The result is equivalent (zero-noise
# data has no variation to optimize), but the floor enables early stop.
min_decrease = 1e-5 * noise_level if noise_level > 0 else 1e-5
# Compute unit weights (Frank-Wolfe with sparsification)
Y_pre_treated_mean = np.mean(Y_pre_treated, axis=1)
unit_weights = compute_sdid_unit_weights(
Y_pre_control,
Y_pre_treated_mean,
zeta_omega=zeta_omega,
min_decrease=min_decrease,
)
# Compute time weights (Frank-Wolfe on collapsed form)
time_weights = compute_time_weights(
Y_pre_control,
Y_post_control,
zeta_lambda=zeta_lambda,
min_decrease=min_decrease,
)
# Compute SDID estimate
Y_post_treated_mean = np.mean(Y_post_treated, axis=1)
att = compute_sdid_estimator(
Y_pre_control,
Y_post_control,
Y_pre_treated_mean,
Y_post_treated_mean,
unit_weights,
time_weights
)
# Compute pre-treatment fit (RMSE)
synthetic_pre = Y_pre_control @ unit_weights
pre_fit_rmse = np.sqrt(np.mean((Y_pre_treated_mean - synthetic_pre) ** 2))
# Warn if pre-treatment fit is poor (Registry requirement).
# Threshold: 1× SD of treated pre-treatment outcomes — a natural baseline
# since RMSE exceeding natural variation indicates the synthetic control
# fails to reproduce the treated series' level or trend.
pre_treatment_sd = np.std(Y_pre_treated_mean, ddof=1) if len(Y_pre_treated_mean) > 1 else 0.0
if pre_treatment_sd > 0 and pre_fit_rmse > pre_treatment_sd:
warnings.warn(
f"Pre-treatment fit is poor: RMSE ({pre_fit_rmse:.4f}) exceeds "
f"the standard deviation of treated pre-treatment outcomes "
f"({pre_treatment_sd:.4f}). The synthetic control may not "
f"adequately reproduce treated unit trends. Consider adding "
f"more control units or adjusting regularization.",
UserWarning,
stacklevel=2,
)
# Compute standard errors based on variance_method
if self.variance_method == "bootstrap":
se, bootstrap_estimates = self._bootstrap_se(
Y_pre_control, Y_post_control,
Y_pre_treated, Y_post_treated,
unit_weights, time_weights,
)
placebo_effects = bootstrap_estimates
inference_method = "bootstrap"
else:
# Use placebo-based variance (R's synthdid Algorithm 4)
se, placebo_effects = self._placebo_variance_se(
Y_pre_control,
Y_post_control,
Y_pre_treated_mean,
Y_post_treated_mean,
n_treated=len(treated_units),
zeta_omega=zeta_omega,
zeta_lambda=zeta_lambda,
min_decrease=min_decrease,
replications=self.n_bootstrap # Reuse n_bootstrap for replications
)
inference_method = "placebo"
# Compute test statistics
t_stat, p_value_analytical, conf_int = safe_inference(att, se, alpha=self.alpha)
if len(placebo_effects) > 0 and np.isfinite(t_stat):
p_value = max(
np.mean(np.abs(placebo_effects) >= np.abs(att)),
1.0 / (len(placebo_effects) + 1),
)
else:
p_value = p_value_analytical
# Create weight dictionaries
unit_weights_dict = {
unit_id: w for unit_id, w in zip(control_units, unit_weights)
}
time_weights_dict = {
period: w for period, w in zip(pre_periods, time_weights)
}
# Store results
self.results_ = SyntheticDiDResults(
att=att,
se=se,
t_stat=t_stat,
p_value=p_value,
conf_int=conf_int,
n_obs=len(data),
n_treated=len(treated_units),
n_control=len(control_units),
unit_weights=unit_weights_dict,
time_weights=time_weights_dict,
pre_periods=pre_periods,
post_periods=post_periods,
alpha=self.alpha,
variance_method=inference_method,
noise_level=noise_level,
zeta_omega=zeta_omega,
zeta_lambda=zeta_lambda,
pre_treatment_fit=pre_fit_rmse,
placebo_effects=placebo_effects if len(placebo_effects) > 0 else None,
n_bootstrap=self.n_bootstrap if inference_method == "bootstrap" else None
)
self._unit_weights = unit_weights
self._time_weights = time_weights
self.is_fitted_ = True
return self.results_
def _create_outcome_matrices(
self,
data: pd.DataFrame,
outcome: str,
unit: str,
time: str,
pre_periods: List[Any],
post_periods: List[Any],
treated_units: List[Any],
control_units: List[Any]
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
"""
Create outcome matrices for SDID estimation.
Returns
-------
tuple
(Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated)
Each is a 2D array with shape (n_periods, n_units)
"""
# Pivot data to wide format
pivot = data.pivot(index=time, columns=unit, values=outcome)
# Extract submatrices
Y_pre_control = pivot.loc[pre_periods, control_units].values
Y_post_control = pivot.loc[post_periods, control_units].values
Y_pre_treated = pivot.loc[pre_periods, treated_units].values
Y_post_treated = pivot.loc[post_periods, treated_units].values
return (
Y_pre_control.astype(float),
Y_post_control.astype(float),
Y_pre_treated.astype(float),
Y_post_treated.astype(float)
)
def _residualize_covariates(
self,
data: pd.DataFrame,
outcome: str,
covariates: List[str],
unit: str,
time: str
) -> pd.DataFrame:
"""
Residualize outcome by regressing out covariates.
Uses two-way fixed effects to partial out covariates.
"""
data = data.copy()
# Create design matrix with covariates
X = data[covariates].values.astype(float)
# Add unit and time dummies
unit_dummies = pd.get_dummies(data[unit], prefix='u', drop_first=True)
time_dummies = pd.get_dummies(data[time], prefix='t', drop_first=True)
X_full = np.column_stack([
np.ones(len(data)),
X,
unit_dummies.values,
time_dummies.values
])
y = data[outcome].values.astype(float)
# Fit and get residuals using unified backend
coeffs, residuals, _ = solve_ols(X_full, y, return_vcov=False)
# Add back the mean for interpretability
data[outcome] = residuals + np.mean(y)
return data
def _bootstrap_se(
self,
Y_pre_control: np.ndarray,
Y_post_control: np.ndarray,
Y_pre_treated: np.ndarray,
Y_post_treated: np.ndarray,
unit_weights: np.ndarray,
time_weights: np.ndarray,
) -> Tuple[float, np.ndarray]:
"""Compute bootstrap standard error matching R's synthdid bootstrap_sample.
Resamples all units (control + treated) with replacement, renormalizes
original unit weights for the resampled controls, and computes the
SDID estimator with **fixed** weights (no re-estimation).
This matches R's ``synthdid::vcov(method="bootstrap")``.
"""
rng = np.random.default_rng(self.seed)
n_control = Y_pre_control.shape[1]
n_treated = Y_pre_treated.shape[1]
n_total = n_control + n_treated
# Build full panel matrix: (n_pre+n_post, n_control+n_treated)
Y_full = np.block([
[Y_pre_control, Y_pre_treated],
[Y_post_control, Y_post_treated]
])
n_pre = Y_pre_control.shape[0]
bootstrap_estimates = []
for _ in range(self.n_bootstrap):
# Resample ALL units with replacement
boot_idx = rng.choice(n_total, size=n_total, replace=True)
# Identify which resampled units are control vs treated
boot_is_control = boot_idx < n_control
boot_control_idx = boot_idx[boot_is_control]
boot_treated_idx = boot_idx[~boot_is_control]
# Skip if no control or no treated units in bootstrap sample
if len(boot_control_idx) == 0 or len(boot_treated_idx) == 0:
continue
try:
# Renormalize original unit weights for the resampled controls
boot_omega = _sum_normalize(unit_weights[boot_control_idx])
# Extract resampled outcome matrices
Y_boot = Y_full[:, boot_idx]
Y_boot_pre_c = Y_boot[:n_pre, boot_is_control]
Y_boot_post_c = Y_boot[n_pre:, boot_is_control]
Y_boot_pre_t = Y_boot[:n_pre, ~boot_is_control]
Y_boot_post_t = Y_boot[n_pre:, ~boot_is_control]
# Compute ATT with FIXED weights (do NOT re-estimate)
Y_boot_pre_t_mean = np.mean(Y_boot_pre_t, axis=1)
Y_boot_post_t_mean = np.mean(Y_boot_post_t, axis=1)
tau = compute_sdid_estimator(
Y_boot_pre_c, Y_boot_post_c,
Y_boot_pre_t_mean, Y_boot_post_t_mean,
boot_omega, time_weights # time_weights = original lambda
)
if np.isfinite(tau):
bootstrap_estimates.append(tau)
except (ValueError, LinAlgError):
continue
bootstrap_estimates = np.array(bootstrap_estimates)
# Check bootstrap success rate and handle failures
n_successful = len(bootstrap_estimates)
failure_rate = 1 - (n_successful / self.n_bootstrap)
if n_successful == 0:
raise ValueError(
f"All {self.n_bootstrap} bootstrap iterations failed. "
f"This typically occurs when:\n"
f" - Sample size is too small for reliable resampling\n"
f" - Weight matrices are singular or near-singular\n"
f" - Insufficient pre-treatment periods for weight estimation\n"
f" - Too few control units relative to treated units\n"
f"Consider using variance_method='placebo' or increasing "
f"the regularization parameters (zeta_omega, zeta_lambda)."
)
elif n_successful == 1:
warnings.warn(
f"Only 1/{self.n_bootstrap} bootstrap iteration succeeded. "
f"Standard error cannot be computed reliably (requires at least 2). "
f"Returning SE=0.0. Consider using variance_method='placebo' or "
f"increasing the regularization (zeta_omega, zeta_lambda).",
UserWarning,
stacklevel=2,
)
se = 0.0
elif failure_rate > 0.05:
warnings.warn(
f"Only {n_successful}/{self.n_bootstrap} bootstrap iterations succeeded "
f"({failure_rate:.1%} failure rate). Standard errors may be unreliable. "
f"This can occur with small samples or insufficient pre-treatment periods.",
UserWarning,
stacklevel=2,
)
se = float(np.std(bootstrap_estimates, ddof=1))
else:
se = float(np.std(bootstrap_estimates, ddof=1))
return se, bootstrap_estimates
def _placebo_variance_se(
self,
Y_pre_control: np.ndarray,
Y_post_control: np.ndarray,
Y_pre_treated_mean: np.ndarray,
Y_post_treated_mean: np.ndarray,
n_treated: int,
zeta_omega: float = 0.0,
zeta_lambda: float = 0.0,
min_decrease: float = 1e-5,
replications: int = 200
) -> Tuple[float, np.ndarray]:
"""
Compute placebo-based variance matching R's synthdid methodology.
This implements Algorithm 4 from Arkhangelsky et al. (2021),
matching R's synthdid::vcov(method = "placebo"):
1. Randomly sample N₀ control indices (permutation)
2. Designate last N₁ as pseudo-treated, first (N₀-N₁) as pseudo-controls
3. Re-estimate both omega and lambda on the permuted data (from
uniform initialization, fresh start), matching R's behavior where
``update.omega=TRUE, update.lambda=TRUE`` are passed via ``opts``
4. Compute SDID estimate with re-estimated weights
5. Repeat `replications` times
6. SE = sqrt((r-1)/r) * sd(estimates)
Parameters
----------
Y_pre_control : np.ndarray
Control outcomes in pre-treatment periods, shape (n_pre, n_control).
Y_post_control : np.ndarray
Control outcomes in post-treatment periods, shape (n_post, n_control).
Y_pre_treated_mean : np.ndarray
Mean treated outcomes in pre-treatment periods, shape (n_pre,).
Y_post_treated_mean : np.ndarray
Mean treated outcomes in post-treatment periods, shape (n_post,).
n_treated : int
Number of treated units in the original estimation.
zeta_omega : float
Regularization parameter for unit weights (for re-estimation).
zeta_lambda : float
Regularization parameter for time weights (for re-estimation).
min_decrease : float
Convergence threshold for Frank-Wolfe (for re-estimation).
replications : int, default=200
Number of placebo replications.
Returns
-------
tuple
(se, placebo_effects) where se is the standard error and
placebo_effects is the array of placebo treatment effects.
References
----------
Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
(2021). Synthetic Difference-in-Differences. American Economic Review,
111(12), 4088-4118. Algorithm 4.
"""
rng = np.random.default_rng(self.seed)
n_pre, n_control = Y_pre_control.shape
# Ensure we have enough controls for the split
n_pseudo_control = n_control - n_treated
if n_pseudo_control < 1:
warnings.warn(
f"Not enough control units ({n_control}) for placebo variance "
f"estimation with {n_treated} treated units. "
f"Consider using variance_method='bootstrap'.",
UserWarning,
stacklevel=3,
)
return 0.0, np.array([])
placebo_estimates = []
for _ in range(replications):
try:
# Random permutation of control indices (Algorithm 4, step 1)
perm = rng.permutation(n_control)
# Split into pseudo-controls and pseudo-treated (step 2)
pseudo_control_idx = perm[:n_pseudo_control]
pseudo_treated_idx = perm[n_pseudo_control:]
# Get pseudo-control and pseudo-treated outcomes
Y_pre_pseudo_control = Y_pre_control[:, pseudo_control_idx]
Y_post_pseudo_control = Y_post_control[:, pseudo_control_idx]
Y_pre_pseudo_treated_mean = np.mean(
Y_pre_control[:, pseudo_treated_idx], axis=1
)
Y_post_pseudo_treated_mean = np.mean(
Y_post_control[:, pseudo_treated_idx], axis=1
)
# Re-estimate weights on permuted data (matching R's behavior)
# R passes update.omega=TRUE, update.lambda=TRUE via opts,
# re-estimating weights from uniform initialization (fresh start).
# Unit weights: re-estimate on pseudo-control/pseudo-treated data
pseudo_omega = compute_sdid_unit_weights(
Y_pre_pseudo_control,
Y_pre_pseudo_treated_mean,
zeta_omega=zeta_omega,
min_decrease=min_decrease,
)
# Time weights: re-estimate on pseudo-control data
pseudo_lambda = compute_time_weights(
Y_pre_pseudo_control,
Y_post_pseudo_control,
zeta_lambda=zeta_lambda,
min_decrease=min_decrease,
)
# Compute placebo SDID estimate (step 4)
tau = compute_sdid_estimator(
Y_pre_pseudo_control,
Y_post_pseudo_control,
Y_pre_pseudo_treated_mean,
Y_post_pseudo_treated_mean,
pseudo_omega,
pseudo_lambda
)
if np.isfinite(tau):
placebo_estimates.append(tau)
except (ValueError, LinAlgError, ZeroDivisionError):
# Skip failed iterations
continue
placebo_estimates = np.array(placebo_estimates)
n_successful = len(placebo_estimates)
if n_successful < 2:
warnings.warn(
f"Only {n_successful} placebo replications completed successfully. "
f"Standard error cannot be estimated reliably. "
f"Consider using variance_method='bootstrap' or increasing "
f"the number of control units.",
UserWarning,
stacklevel=3,
)
return 0.0, placebo_estimates
# Warn if many replications failed
failure_rate = 1 - (n_successful / replications)
if failure_rate > 0.05:
warnings.warn(
f"Only {n_successful}/{replications} placebo replications succeeded "
f"({failure_rate:.1%} failure rate). Standard errors may be unreliable.",
UserWarning,
stacklevel=3,
)
# Compute SE using R's formula: sqrt((r-1)/r) * sd(estimates)
# This matches synthdid::vcov.R exactly
se = np.sqrt((n_successful - 1) / n_successful) * np.std(
placebo_estimates, ddof=1
)
return se, placebo_estimates
[docs]
def get_params(self) -> Dict[str, Any]:
"""Get estimator parameters."""
return {
"zeta_omega": self.zeta_omega,
"zeta_lambda": self.zeta_lambda,
"alpha": self.alpha,
"variance_method": self.variance_method,
"n_bootstrap": self.n_bootstrap,
"seed": self.seed,
}
[docs]
def set_params(self, **params) -> "SyntheticDiD":
"""Set estimator parameters."""
# Deprecated parameter names — emit warning and ignore
_deprecated = {"lambda_reg", "zeta"}
for key, value in params.items():
if key in _deprecated:
warnings.warn(
f"{key} is deprecated and ignored. Use zeta_omega/zeta_lambda "
f"instead.",
DeprecationWarning,
stacklevel=2,
)
elif hasattr(self, key):
setattr(self, key, value)
else:
raise ValueError(f"Unknown parameter: {key}")
return self