Source code for diff_diff.synthetic_did

"""
Synthetic Difference-in-Differences estimator.
"""

import warnings
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd
from numpy.linalg import LinAlgError

from diff_diff.estimators import DifferenceInDifferences
from diff_diff.linalg import solve_ols
from diff_diff.results import SyntheticDiDResults
from diff_diff.utils import (
    _compute_regularization,
    _sum_normalize,
    compute_sdid_estimator,
    compute_sdid_unit_weights,
    compute_time_weights,
    safe_inference,
    validate_binary,
)



[docs]
class SyntheticDiD(DifferenceInDifferences):
    """
    Synthetic Difference-in-Differences (SDID) estimator.

    Combines the strengths of Difference-in-Differences and Synthetic Control
    methods by re-weighting control units to better match treated units'
    pre-treatment trends.

    This method is particularly useful when:
    - You have few treated units (possibly just one)
    - Parallel trends assumption may be questionable
    - Control units are heterogeneous and need reweighting
    - You want robustness to pre-treatment differences

    Parameters
    ----------
    zeta_omega : float, optional
        Regularization for unit weights. If None (default), auto-computed
        from data as ``(N1 * T1)^(1/4) * noise_level`` matching R's synthdid.
    zeta_lambda : float, optional
        Regularization for time weights. If None (default), auto-computed
        from data as ``1e-6 * noise_level`` matching R's synthdid.
    alpha : float, default=0.05
        Significance level for confidence intervals.
    variance_method : str, default="placebo"
        Method for variance estimation:
        - "placebo": Placebo-based variance matching R's synthdid::vcov(method="placebo").
          Implements Algorithm 4 from Arkhangelsky et al. (2021). This is R's default.
        - "bootstrap": Bootstrap at unit level with fixed weights matching R's
          synthdid::vcov(method="bootstrap").
    n_bootstrap : int, default=200
        Number of replications for variance estimation. Used for both:
        - Bootstrap: Number of bootstrap samples
        - Placebo: Number of random permutations (matches R's `replications` argument)
    seed : int, optional
        Random seed for reproducibility. If None (default), results
        will vary between runs.

    Attributes
    ----------
    results_ : SyntheticDiDResults
        Estimation results after calling fit().
    is_fitted_ : bool
        Whether the model has been fitted.

    Examples
    --------
    Basic usage with panel data:

    >>> import pandas as pd
    >>> from diff_diff import SyntheticDiD
    >>>
    >>> # Panel data with units observed over multiple time periods
    >>> # Treatment occurs at period 5 for treated units
    >>> data = pd.DataFrame({
    ...     'unit': [...],      # Unit identifier
    ...     'period': [...],    # Time period
    ...     'outcome': [...],   # Outcome variable
    ...     'treated': [...]    # 1 if unit is ever treated, 0 otherwise
    ... })
    >>>
    >>> # Fit SDID model
    >>> sdid = SyntheticDiD()
    >>> results = sdid.fit(
    ...     data,
    ...     outcome='outcome',
    ...     treatment='treated',
    ...     unit='unit',
    ...     time='period',
    ...     post_periods=[5, 6, 7, 8]
    ... )
    >>>
    >>> # View results
    >>> results.print_summary()
    >>> print(f"ATT: {results.att:.3f} (SE: {results.se:.3f})")
    >>>
    >>> # Examine unit weights
    >>> weights_df = results.get_unit_weights_df()
    >>> print(weights_df.head(10))

    Notes
    -----
    The SDID estimator (Arkhangelsky et al., 2021) computes:

        τ̂ = (Ȳ_treated,post - Σ_t λ_t * Y_treated,t)
            - Σ_j ω_j * (Ȳ_j,post - Σ_t λ_t * Y_j,t)

    Where:
    - ω_j are unit weights (sum to 1, non-negative)
    - λ_t are time weights (sum to 1, non-negative)

    Unit weights ω are chosen to match pre-treatment outcomes:
        min ||Σ_j ω_j * Y_j,pre - Y_treated,pre||²

    This interpolates between:
    - Standard DiD (uniform weights): ω_j = 1/N_control
    - Synthetic Control (exact matching): concentrated weights

    References
    ----------
    Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
    (2021). Synthetic Difference-in-Differences. American Economic Review,
    111(12), 4088-4118.
    """


[docs]
    def __init__(
        self,
        zeta_omega: Optional[float] = None,
        zeta_lambda: Optional[float] = None,
        alpha: float = 0.05,
        variance_method: str = "placebo",
        n_bootstrap: int = 200,
        seed: Optional[int] = None,
        # Deprecated — accepted for backward compat, ignored with warning
        lambda_reg: Optional[float] = None,
        zeta: Optional[float] = None,
    ):
        if lambda_reg is not None:
            warnings.warn(
                "lambda_reg is deprecated and ignored. Regularization is now "
                "auto-computed from data. Use zeta_omega to override unit weight "
                "regularization.",
                DeprecationWarning,
                stacklevel=2,
            )
        if zeta is not None:
            warnings.warn(
                "zeta is deprecated and ignored. Use zeta_lambda to override "
                "time weight regularization.",
                DeprecationWarning,
                stacklevel=2,
            )

        super().__init__(robust=True, cluster=None, alpha=alpha)
        self.zeta_omega = zeta_omega
        self.zeta_lambda = zeta_lambda
        self.variance_method = variance_method
        self.n_bootstrap = n_bootstrap
        self.seed = seed

        # Validate n_bootstrap
        if n_bootstrap < 2:
            raise ValueError(
                f"n_bootstrap must be >= 2 (got {n_bootstrap}). At least 2 "
                f"iterations are needed to estimate standard errors."
            )

        # Validate variance_method
        valid_methods = ("bootstrap", "placebo")
        if variance_method not in valid_methods:
            raise ValueError(
                f"variance_method must be one of {valid_methods}, "
                f"got '{variance_method}'"
            )

        self._unit_weights = None
        self._time_weights = None



[docs]
    def fit(  # type: ignore[override]
        self,
        data: pd.DataFrame,
        outcome: str,
        treatment: str,
        unit: str,
        time: str,
        post_periods: Optional[List[Any]] = None,
        covariates: Optional[List[str]] = None
    ) -> SyntheticDiDResults:
        """
        Fit the Synthetic Difference-in-Differences model.

        Parameters
        ----------
        data : pd.DataFrame
            Panel data with observations for multiple units over multiple
            time periods.
        outcome : str
            Name of the outcome variable column.
        treatment : str
            Name of the treatment group indicator column (0/1).
            Should be 1 for all observations of treated units
            (both pre and post treatment).
        unit : str
            Name of the unit identifier column.
        time : str
            Name of the time period column.
        post_periods : list, optional
            List of time period values that are post-treatment.
            If None, uses the last half of periods.
        covariates : list, optional
            List of covariate column names. Covariates are residualized
            out before computing the SDID estimator.

        Returns
        -------
        SyntheticDiDResults
            Object containing the ATT estimate, standard error,
            unit weights, and time weights.

        Raises
        ------
        ValueError
            If required parameters are missing or data validation fails.
        """
        # Validate inputs
        if outcome is None or treatment is None or unit is None or time is None:
            raise ValueError(
                "Must provide 'outcome', 'treatment', 'unit', and 'time'"
            )

        # Check columns exist
        required_cols = [outcome, treatment, unit, time]
        if covariates:
            required_cols.extend(covariates)

        missing = [c for c in required_cols if c not in data.columns]
        if missing:
            raise ValueError(f"Missing columns: {missing}")

        # Validate treatment is binary
        validate_binary(data[treatment].values, "treatment")

        # Get all unique time periods
        all_periods = sorted(data[time].unique())

        if len(all_periods) < 2:
            raise ValueError("Need at least 2 time periods")

        # Determine pre and post periods
        if post_periods is None:
            mid = len(all_periods) // 2
            post_periods = list(all_periods[mid:])
            pre_periods = list(all_periods[:mid])
        else:
            post_periods = list(post_periods)
            pre_periods = [p for p in all_periods if p not in post_periods]

        if len(post_periods) == 0:
            raise ValueError("Must have at least one post-treatment period")
        if len(pre_periods) == 0:
            raise ValueError("Must have at least one pre-treatment period")

        # Validate post_periods are in data
        for p in post_periods:
            if p not in all_periods:
                raise ValueError(f"Post-period '{p}' not found in time column")

        # Identify treated and control units
        # Treatment indicator should be constant within unit
        unit_treatment = data.groupby(unit)[treatment].first()

        # Validate treatment is constant within unit (SDID requires block treatment)
        treatment_nunique = data.groupby(unit)[treatment].nunique()
        varying_units = treatment_nunique[treatment_nunique > 1]
        if len(varying_units) > 0:
            example_unit = varying_units.index[0]
            example_vals = sorted(
                data.loc[data[unit] == example_unit, treatment].unique()
            )
            raise ValueError(
                f"Treatment indicator varies within {len(varying_units)} unit(s) "
                f"(e.g., unit '{example_unit}' has values {example_vals}). "
                f"SyntheticDiD requires 'block' treatment where treatment is "
                f"constant within each unit across all time periods. "
                f"For staggered adoption designs, use CallawaySantAnna or "
                f"ImputationDiD instead."
            )

        treated_units = unit_treatment[unit_treatment == 1].index.tolist()
        control_units = unit_treatment[unit_treatment == 0].index.tolist()

        if len(treated_units) == 0:
            raise ValueError("No treated units found")
        if len(control_units) == 0:
            raise ValueError("No control units found")

        # Validate balanced panel (SDID requires all units observed in all periods)
        periods_per_unit = data.groupby(unit)[time].nunique()
        expected_n_periods = len(all_periods)
        unbalanced_units = periods_per_unit[periods_per_unit != expected_n_periods]
        if len(unbalanced_units) > 0:
            example_unit = unbalanced_units.index[0]
            actual_count = unbalanced_units.iloc[0]
            raise ValueError(
                f"Panel is not balanced: {len(unbalanced_units)} unit(s) do not "
                f"have observations in all {expected_n_periods} periods "
                f"(e.g., unit '{example_unit}' has {actual_count} periods). "
                f"SyntheticDiD requires a balanced panel. Use "
                f"diff_diff.prep.balance_panel() to balance the panel first."
            )

        # Residualize covariates if provided
        working_data = data.copy()
        if covariates:
            working_data = self._residualize_covariates(
                working_data, outcome, covariates, unit, time
            )

        # Create outcome matrices
        # Shape: (n_periods, n_units)
        Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated = \
            self._create_outcome_matrices(
                working_data, outcome, unit, time,
                pre_periods, post_periods, treated_units, control_units
            )

        # Compute auto-regularization (or use user overrides)
        auto_zeta_omega, auto_zeta_lambda = _compute_regularization(
            Y_pre_control, len(treated_units), len(post_periods)
        )
        zeta_omega = self.zeta_omega if self.zeta_omega is not None else auto_zeta_omega
        zeta_lambda = self.zeta_lambda if self.zeta_lambda is not None else auto_zeta_lambda

        # Store noise level for diagnostics
        from diff_diff.utils import _compute_noise_level
        noise_level = _compute_noise_level(Y_pre_control)

        # Data-dependent convergence threshold (matches R's 1e-5 * noise.level).
        # Floor of 1e-5 when noise_level == 0: R would use 0.0, causing FW to
        # run all max_iter iterations.  The result is equivalent (zero-noise
        # data has no variation to optimize), but the floor enables early stop.
        min_decrease = 1e-5 * noise_level if noise_level > 0 else 1e-5

        # Compute unit weights (Frank-Wolfe with sparsification)
        Y_pre_treated_mean = np.mean(Y_pre_treated, axis=1)

        unit_weights = compute_sdid_unit_weights(
            Y_pre_control,
            Y_pre_treated_mean,
            zeta_omega=zeta_omega,
            min_decrease=min_decrease,
        )

        # Compute time weights (Frank-Wolfe on collapsed form)
        time_weights = compute_time_weights(
            Y_pre_control,
            Y_post_control,
            zeta_lambda=zeta_lambda,
            min_decrease=min_decrease,
        )

        # Compute SDID estimate
        Y_post_treated_mean = np.mean(Y_post_treated, axis=1)

        att = compute_sdid_estimator(
            Y_pre_control,
            Y_post_control,
            Y_pre_treated_mean,
            Y_post_treated_mean,
            unit_weights,
            time_weights
        )

        # Compute pre-treatment fit (RMSE)
        synthetic_pre = Y_pre_control @ unit_weights
        pre_fit_rmse = np.sqrt(np.mean((Y_pre_treated_mean - synthetic_pre) ** 2))

        # Warn if pre-treatment fit is poor (Registry requirement).
        # Threshold: 1× SD of treated pre-treatment outcomes — a natural baseline
        # since RMSE exceeding natural variation indicates the synthetic control
        # fails to reproduce the treated series' level or trend.
        pre_treatment_sd = np.std(Y_pre_treated_mean, ddof=1) if len(Y_pre_treated_mean) > 1 else 0.0
        if pre_treatment_sd > 0 and pre_fit_rmse > pre_treatment_sd:
            warnings.warn(
                f"Pre-treatment fit is poor: RMSE ({pre_fit_rmse:.4f}) exceeds "
                f"the standard deviation of treated pre-treatment outcomes "
                f"({pre_treatment_sd:.4f}). The synthetic control may not "
                f"adequately reproduce treated unit trends. Consider adding "
                f"more control units or adjusting regularization.",
                UserWarning,
                stacklevel=2,
            )

        # Compute standard errors based on variance_method
        if self.variance_method == "bootstrap":
            se, bootstrap_estimates = self._bootstrap_se(
                Y_pre_control, Y_post_control,
                Y_pre_treated, Y_post_treated,
                unit_weights, time_weights,
            )
            placebo_effects = bootstrap_estimates
            inference_method = "bootstrap"
        else:
            # Use placebo-based variance (R's synthdid Algorithm 4)
            se, placebo_effects = self._placebo_variance_se(
                Y_pre_control,
                Y_post_control,
                Y_pre_treated_mean,
                Y_post_treated_mean,
                n_treated=len(treated_units),
                zeta_omega=zeta_omega,
                zeta_lambda=zeta_lambda,
                min_decrease=min_decrease,
                replications=self.n_bootstrap  # Reuse n_bootstrap for replications
            )
            inference_method = "placebo"

        # Compute test statistics
        t_stat, p_value_analytical, conf_int = safe_inference(att, se, alpha=self.alpha)
        if len(placebo_effects) > 0 and np.isfinite(t_stat):
            p_value = max(
                np.mean(np.abs(placebo_effects) >= np.abs(att)),
                1.0 / (len(placebo_effects) + 1),
            )
        else:
            p_value = p_value_analytical

        # Create weight dictionaries
        unit_weights_dict = {
            unit_id: w for unit_id, w in zip(control_units, unit_weights)
        }
        time_weights_dict = {
            period: w for period, w in zip(pre_periods, time_weights)
        }

        # Store results
        self.results_ = SyntheticDiDResults(
            att=att,
            se=se,
            t_stat=t_stat,
            p_value=p_value,
            conf_int=conf_int,
            n_obs=len(data),
            n_treated=len(treated_units),
            n_control=len(control_units),
            unit_weights=unit_weights_dict,
            time_weights=time_weights_dict,
            pre_periods=pre_periods,
            post_periods=post_periods,
            alpha=self.alpha,
            variance_method=inference_method,
            noise_level=noise_level,
            zeta_omega=zeta_omega,
            zeta_lambda=zeta_lambda,
            pre_treatment_fit=pre_fit_rmse,
            placebo_effects=placebo_effects if len(placebo_effects) > 0 else None,
            n_bootstrap=self.n_bootstrap if inference_method == "bootstrap" else None
        )

        self._unit_weights = unit_weights
        self._time_weights = time_weights
        self.is_fitted_ = True

        return self.results_


    def _create_outcome_matrices(
        self,
        data: pd.DataFrame,
        outcome: str,
        unit: str,
        time: str,
        pre_periods: List[Any],
        post_periods: List[Any],
        treated_units: List[Any],
        control_units: List[Any]
    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
        """
        Create outcome matrices for SDID estimation.

        Returns
        -------
        tuple
            (Y_pre_control, Y_post_control, Y_pre_treated, Y_post_treated)
            Each is a 2D array with shape (n_periods, n_units)
        """
        # Pivot data to wide format
        pivot = data.pivot(index=time, columns=unit, values=outcome)

        # Extract submatrices
        Y_pre_control = pivot.loc[pre_periods, control_units].values
        Y_post_control = pivot.loc[post_periods, control_units].values
        Y_pre_treated = pivot.loc[pre_periods, treated_units].values
        Y_post_treated = pivot.loc[post_periods, treated_units].values

        return (
            Y_pre_control.astype(float),
            Y_post_control.astype(float),
            Y_pre_treated.astype(float),
            Y_post_treated.astype(float)
        )

    def _residualize_covariates(
        self,
        data: pd.DataFrame,
        outcome: str,
        covariates: List[str],
        unit: str,
        time: str
    ) -> pd.DataFrame:
        """
        Residualize outcome by regressing out covariates.

        Uses two-way fixed effects to partial out covariates.
        """
        data = data.copy()

        # Create design matrix with covariates
        X = data[covariates].values.astype(float)

        # Add unit and time dummies
        unit_dummies = pd.get_dummies(data[unit], prefix='u', drop_first=True)
        time_dummies = pd.get_dummies(data[time], prefix='t', drop_first=True)

        X_full = np.column_stack([
            np.ones(len(data)),
            X,
            unit_dummies.values,
            time_dummies.values
        ])

        y = data[outcome].values.astype(float)

        # Fit and get residuals using unified backend
        coeffs, residuals, _ = solve_ols(X_full, y, return_vcov=False)

        # Add back the mean for interpretability
        data[outcome] = residuals + np.mean(y)

        return data

    def _bootstrap_se(
        self,
        Y_pre_control: np.ndarray,
        Y_post_control: np.ndarray,
        Y_pre_treated: np.ndarray,
        Y_post_treated: np.ndarray,
        unit_weights: np.ndarray,
        time_weights: np.ndarray,
    ) -> Tuple[float, np.ndarray]:
        """Compute bootstrap standard error matching R's synthdid bootstrap_sample.

        Resamples all units (control + treated) with replacement, renormalizes
        original unit weights for the resampled controls, and computes the
        SDID estimator with **fixed** weights (no re-estimation).

        This matches R's ``synthdid::vcov(method="bootstrap")``.
        """
        rng = np.random.default_rng(self.seed)
        n_control = Y_pre_control.shape[1]
        n_treated = Y_pre_treated.shape[1]
        n_total = n_control + n_treated

        # Build full panel matrix: (n_pre+n_post, n_control+n_treated)
        Y_full = np.block([
            [Y_pre_control, Y_pre_treated],
            [Y_post_control, Y_post_treated]
        ])
        n_pre = Y_pre_control.shape[0]

        bootstrap_estimates = []

        for _ in range(self.n_bootstrap):
            # Resample ALL units with replacement
            boot_idx = rng.choice(n_total, size=n_total, replace=True)

            # Identify which resampled units are control vs treated
            boot_is_control = boot_idx < n_control
            boot_control_idx = boot_idx[boot_is_control]
            boot_treated_idx = boot_idx[~boot_is_control]

            # Skip if no control or no treated units in bootstrap sample
            if len(boot_control_idx) == 0 or len(boot_treated_idx) == 0:
                continue

            try:
                # Renormalize original unit weights for the resampled controls
                boot_omega = _sum_normalize(unit_weights[boot_control_idx])

                # Extract resampled outcome matrices
                Y_boot = Y_full[:, boot_idx]
                Y_boot_pre_c = Y_boot[:n_pre, boot_is_control]
                Y_boot_post_c = Y_boot[n_pre:, boot_is_control]
                Y_boot_pre_t = Y_boot[:n_pre, ~boot_is_control]
                Y_boot_post_t = Y_boot[n_pre:, ~boot_is_control]

                # Compute ATT with FIXED weights (do NOT re-estimate)
                Y_boot_pre_t_mean = np.mean(Y_boot_pre_t, axis=1)
                Y_boot_post_t_mean = np.mean(Y_boot_post_t, axis=1)

                tau = compute_sdid_estimator(
                    Y_boot_pre_c, Y_boot_post_c,
                    Y_boot_pre_t_mean, Y_boot_post_t_mean,
                    boot_omega, time_weights  # time_weights = original lambda
                )
                if np.isfinite(tau):
                    bootstrap_estimates.append(tau)

            except (ValueError, LinAlgError):
                continue

        bootstrap_estimates = np.array(bootstrap_estimates)

        # Check bootstrap success rate and handle failures
        n_successful = len(bootstrap_estimates)
        failure_rate = 1 - (n_successful / self.n_bootstrap)

        if n_successful == 0:
            raise ValueError(
                f"All {self.n_bootstrap} bootstrap iterations failed. "
                f"This typically occurs when:\n"
                f"  - Sample size is too small for reliable resampling\n"
                f"  - Weight matrices are singular or near-singular\n"
                f"  - Insufficient pre-treatment periods for weight estimation\n"
                f"  - Too few control units relative to treated units\n"
                f"Consider using variance_method='placebo' or increasing "
                f"the regularization parameters (zeta_omega, zeta_lambda)."
            )
        elif n_successful == 1:
            warnings.warn(
                f"Only 1/{self.n_bootstrap} bootstrap iteration succeeded. "
                f"Standard error cannot be computed reliably (requires at least 2). "
                f"Returning SE=0.0. Consider using variance_method='placebo' or "
                f"increasing the regularization (zeta_omega, zeta_lambda).",
                UserWarning,
                stacklevel=2,
            )
            se = 0.0
        elif failure_rate > 0.05:
            warnings.warn(
                f"Only {n_successful}/{self.n_bootstrap} bootstrap iterations succeeded "
                f"({failure_rate:.1%} failure rate). Standard errors may be unreliable. "
                f"This can occur with small samples or insufficient pre-treatment periods.",
                UserWarning,
                stacklevel=2,
            )
            se = float(np.std(bootstrap_estimates, ddof=1))
        else:
            se = float(np.std(bootstrap_estimates, ddof=1))

        return se, bootstrap_estimates

    def _placebo_variance_se(
        self,
        Y_pre_control: np.ndarray,
        Y_post_control: np.ndarray,
        Y_pre_treated_mean: np.ndarray,
        Y_post_treated_mean: np.ndarray,
        n_treated: int,
        zeta_omega: float = 0.0,
        zeta_lambda: float = 0.0,
        min_decrease: float = 1e-5,
        replications: int = 200
    ) -> Tuple[float, np.ndarray]:
        """
        Compute placebo-based variance matching R's synthdid methodology.

        This implements Algorithm 4 from Arkhangelsky et al. (2021),
        matching R's synthdid::vcov(method = "placebo"):

        1. Randomly sample N₀ control indices (permutation)
        2. Designate last N₁ as pseudo-treated, first (N₀-N₁) as pseudo-controls
        3. Re-estimate both omega and lambda on the permuted data (from
           uniform initialization, fresh start), matching R's behavior where
           ``update.omega=TRUE, update.lambda=TRUE`` are passed via ``opts``
        4. Compute SDID estimate with re-estimated weights
        5. Repeat `replications` times
        6. SE = sqrt((r-1)/r) * sd(estimates)

        Parameters
        ----------
        Y_pre_control : np.ndarray
            Control outcomes in pre-treatment periods, shape (n_pre, n_control).
        Y_post_control : np.ndarray
            Control outcomes in post-treatment periods, shape (n_post, n_control).
        Y_pre_treated_mean : np.ndarray
            Mean treated outcomes in pre-treatment periods, shape (n_pre,).
        Y_post_treated_mean : np.ndarray
            Mean treated outcomes in post-treatment periods, shape (n_post,).
        n_treated : int
            Number of treated units in the original estimation.
        zeta_omega : float
            Regularization parameter for unit weights (for re-estimation).
        zeta_lambda : float
            Regularization parameter for time weights (for re-estimation).
        min_decrease : float
            Convergence threshold for Frank-Wolfe (for re-estimation).
        replications : int, default=200
            Number of placebo replications.

        Returns
        -------
        tuple
            (se, placebo_effects) where se is the standard error and
            placebo_effects is the array of placebo treatment effects.

        References
        ----------
        Arkhangelsky, D., Athey, S., Hirshberg, D. A., Imbens, G. W., & Wager, S.
        (2021). Synthetic Difference-in-Differences. American Economic Review,
        111(12), 4088-4118. Algorithm 4.
        """
        rng = np.random.default_rng(self.seed)
        n_pre, n_control = Y_pre_control.shape

        # Ensure we have enough controls for the split
        n_pseudo_control = n_control - n_treated
        if n_pseudo_control < 1:
            warnings.warn(
                f"Not enough control units ({n_control}) for placebo variance "
                f"estimation with {n_treated} treated units. "
                f"Consider using variance_method='bootstrap'.",
                UserWarning,
                stacklevel=3,
            )
            return 0.0, np.array([])

        placebo_estimates = []

        for _ in range(replications):
            try:
                # Random permutation of control indices (Algorithm 4, step 1)
                perm = rng.permutation(n_control)

                # Split into pseudo-controls and pseudo-treated (step 2)
                pseudo_control_idx = perm[:n_pseudo_control]
                pseudo_treated_idx = perm[n_pseudo_control:]

                # Get pseudo-control and pseudo-treated outcomes
                Y_pre_pseudo_control = Y_pre_control[:, pseudo_control_idx]
                Y_post_pseudo_control = Y_post_control[:, pseudo_control_idx]
                Y_pre_pseudo_treated_mean = np.mean(
                    Y_pre_control[:, pseudo_treated_idx], axis=1
                )
                Y_post_pseudo_treated_mean = np.mean(
                    Y_post_control[:, pseudo_treated_idx], axis=1
                )

                # Re-estimate weights on permuted data (matching R's behavior)
                # R passes update.omega=TRUE, update.lambda=TRUE via opts,
                # re-estimating weights from uniform initialization (fresh start).
                # Unit weights: re-estimate on pseudo-control/pseudo-treated data
                pseudo_omega = compute_sdid_unit_weights(
                    Y_pre_pseudo_control,
                    Y_pre_pseudo_treated_mean,
                    zeta_omega=zeta_omega,
                    min_decrease=min_decrease,
                )

                # Time weights: re-estimate on pseudo-control data
                pseudo_lambda = compute_time_weights(
                    Y_pre_pseudo_control,
                    Y_post_pseudo_control,
                    zeta_lambda=zeta_lambda,
                    min_decrease=min_decrease,
                )

                # Compute placebo SDID estimate (step 4)
                tau = compute_sdid_estimator(
                    Y_pre_pseudo_control,
                    Y_post_pseudo_control,
                    Y_pre_pseudo_treated_mean,
                    Y_post_pseudo_treated_mean,
                    pseudo_omega,
                    pseudo_lambda
                )
                if np.isfinite(tau):
                    placebo_estimates.append(tau)

            except (ValueError, LinAlgError, ZeroDivisionError):
                # Skip failed iterations
                continue

        placebo_estimates = np.array(placebo_estimates)
        n_successful = len(placebo_estimates)

        if n_successful < 2:
            warnings.warn(
                f"Only {n_successful} placebo replications completed successfully. "
                f"Standard error cannot be estimated reliably. "
                f"Consider using variance_method='bootstrap' or increasing "
                f"the number of control units.",
                UserWarning,
                stacklevel=3,
            )
            return 0.0, placebo_estimates

        # Warn if many replications failed
        failure_rate = 1 - (n_successful / replications)
        if failure_rate > 0.05:
            warnings.warn(
                f"Only {n_successful}/{replications} placebo replications succeeded "
                f"({failure_rate:.1%} failure rate). Standard errors may be unreliable.",
                UserWarning,
                stacklevel=3,
            )

        # Compute SE using R's formula: sqrt((r-1)/r) * sd(estimates)
        # This matches synthdid::vcov.R exactly
        se = np.sqrt((n_successful - 1) / n_successful) * np.std(
            placebo_estimates, ddof=1
        )

        return se, placebo_estimates


[docs]
    def get_params(self) -> Dict[str, Any]:
        """Get estimator parameters."""
        return {
            "zeta_omega": self.zeta_omega,
            "zeta_lambda": self.zeta_lambda,
            "alpha": self.alpha,
            "variance_method": self.variance_method,
            "n_bootstrap": self.n_bootstrap,
            "seed": self.seed,
        }



[docs]
    def set_params(self, **params) -> "SyntheticDiD":
        """Set estimator parameters."""
        # Deprecated parameter names — emit warning and ignore
        _deprecated = {"lambda_reg", "zeta"}
        for key, value in params.items():
            if key in _deprecated:
                warnings.warn(
                    f"{key} is deprecated and ignored. Use zeta_omega/zeta_lambda "
                    f"instead.",
                    DeprecationWarning,
                    stacklevel=2,
                )
            elif hasattr(self, key):
                setattr(self, key, value)
            else:
                raise ValueError(f"Unknown parameter: {key}")
        return self