Source code for diff_diff.wooldridge_results

Name: diff-diff
Author: diff-diff contributors
"""Results class for WooldridgeDiD (ETWFE) estimator."""

from __future__ import annotations

import warnings
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import pandas as pd

from diff_diff.utils import safe_inference



[docs]
@dataclass
class WooldridgeDiDResults:
    """Results from WooldridgeDiD.fit().

    Core output is ``group_time_effects``: a dict keyed by (cohort_g, time_t)
    with per-cell ATT estimates and inference. Call
    ``.aggregate(type, weights=...)`` to compute any of the four
    ``jwdid_estat`` aggregation types under either the default
    cell-count weighting (``weights="cell"``, matches Stata
    ``jwdid_estat``) or the paper W2025 opt-in cohort-share weighting
    (``weights="cohort_share"``, Eqs. 7.4 / 7.6; restricted to
    ``type ∈ {"simple", "event"}``). ``cohort_trend_coefs`` carries
    Section 8 / Eq. 8.1 estimated ``δ_g`` slopes when the fit was
    produced under ``WooldridgeDiD(cohort_trends=True)``.
    ``aggregation_weights`` is keyed by aggregation type and records
    the active weighting scheme that wrote to each cached surface
    (surfaced in ``summary()`` / ``to_dataframe()`` / ``__repr__``).
    """

    # ------------------------------------------------------------------ #
    # Core cohort×time estimates                                          #
    # ------------------------------------------------------------------ #
    group_time_effects: Dict[Tuple[Any, Any], Dict[str, Any]]
    """key=(g,t), value={att, se, t_stat, p_value, conf_int}"""

    # ------------------------------------------------------------------ #
    # Simple (overall) aggregation — always populated at fit time         #
    # ------------------------------------------------------------------ #
    overall_att: float
    overall_se: float
    overall_t_stat: float
    overall_p_value: float
    overall_conf_int: Tuple[float, float]

    # ------------------------------------------------------------------ #
    # Other aggregations — populated by .aggregate()                      #
    # ------------------------------------------------------------------ #
    group_effects: Optional[Dict[Any, Dict]] = field(default=None, repr=False)
    calendar_effects: Optional[Dict[Any, Dict]] = field(default=None, repr=False)
    event_study_effects: Optional[Dict[int, Dict]] = field(default=None, repr=False)

    # ------------------------------------------------------------------ #
    # Metadata                                                            #
    # ------------------------------------------------------------------ #
    method: str = "ols"
    control_group: str = "not_yet_treated"
    groups: List[Any] = field(default_factory=list)
    time_periods: List[Any] = field(default_factory=list)
    n_obs: int = 0
    n_treated_units: int = 0
    n_control_units: int = 0
    alpha: float = 0.05
    anticipation: int = 0
    survey_metadata: Optional[Any] = field(default=None, repr=False)

    # Variance-family metadata. ``vcov_type`` records the configured analytical
    # family ("classical", "hc1", "hc2", "hc2_bm", or "conley" — the conley
    # spatial-HAC path also populates ``conley_lag_cutoff``); when ``survey_design=``
    # is supplied the survey TSL (or replicate-weight refit) variance overrides
    # this — the field still records the configured value and
    # ``survey_metadata`` indicates the survey path was active. On bootstrap
    # fits (``n_bootstrap > 0``) the SE comes from the multiplier bootstrap,
    # not the analytical family. ``cluster_name`` / ``n_clusters`` are
    # populated when the fit was clustered (default unit cluster, or
    # user-set ``cluster=X``); both are ``None`` on explicit one-way
    # (``vcov_type in {"classical","hc2"}`` + no user cluster) fits where
    # the auto-cluster was dropped.
    vcov_type: str = "hc1"
    cluster_name: Optional[str] = None
    n_clusters: Optional[int] = None
    # Conley spatial-HAC within-unit Bartlett max lag (populated only when
    # ``vcov_type == "conley"``; ``None`` otherwise). Carries the configured
    # ``conley_lag_cutoff`` for the summary variance label.
    conley_lag_cutoff: Optional[int] = None

    # Heterogeneous cohort-specific linear trends (paper W2025 Section 8 /
    # Eq. 8.1). Keyed by treated cohort ``g`` → estimated slope ``δ_g``.
    # Empty dict when ``WooldridgeDiD`` was fit with ``cohort_trends=False``
    # (the default). Populated only via the OLS path; logit / poisson
    # reject ``cohort_trends=True`` at the constructor per paper Section 8
    # OLS-only scope.
    #
    # Identification + baseline normalization (paper W2025 Section 5.4):
    # the reported ``δ_g`` slopes are RELATIVE TO THE BASELINE TREND
    # absorbed by the design — the never-treated cohort's trend (when a
    # never-treated cohort exists) OR the last cohort's trend (when no
    # never-treated cohort exists, per the all-eventually-treated drop
    # rule). On all-treated panels the last cohort is intentionally
    # absent from the dict; its slope is the baseline (zero in deviation
    # form). See REGISTRY ``## WooldridgeDiD (ETWFE)`` → "Heterogeneous
    # cohort trends" Notes for the exact normalization contract.
    cohort_trend_coefs: Dict[Any, float] = field(default_factory=dict, repr=False)

    # Flag set by ``_fit_ols`` when ``n_bootstrap > 0`` AND the multiplier
    # bootstrap actually ran (i.e., produced at least one valid bootstrap
    # statistic). When True, ``aggregate(type="simple", weights="cell")``
    # is a no-op (preserves the bootstrap inference populated at fit time)
    # and ``aggregate(type="simple", weights="cohort_share")`` raises
    # because the cohort-share aggregation is not bootstrapped — re-fit
    # with ``n_bootstrap=0`` to use cohort-share + analytical inference,
    # or wait for the deferred bootstrap-cohort-share follow-up.
    _bootstrap_used: bool = field(default=False, repr=False)

    # Model-surface metadata for self-describing reporting.
    # ``cohort_trends`` records whether the fit was produced under the
    # Section 8 / Eq. 8.1 heterogeneous-cohort-trends design (paper
    # W2025 ``dg_i · t`` interactions on the OLS path). False on the
    # default ``cohort_trends=False`` fit and on logit/Poisson paths
    # (which reject ``cohort_trends=True`` at the constructor).
    #
    # ``aggregation_weights`` records the weighting scheme PER cached
    # aggregation surface so ``summary()`` / ``to_dataframe()`` /
    # ``__repr__()`` can label each surface correctly under mixed-order
    # ``aggregate(weights=...)`` calls. Keys: ``"simple"`` (matches the
    # ``overall_*`` fields), ``"group"``, ``"calendar"``, ``"event"``.
    # The fit-time ``overall_*`` is cell-weighted, so ``"simple"`` is
    # initialized to ``"cell"`` and only flips after a successful
    # ``aggregate(type="simple", weights="cohort_share")`` call. The
    # other keys are populated lazily by ``aggregate()``. Mutation is
    # atomic — only set after the aggregation passes all validation
    # AND completes successfully, so failed cohort_share calls on
    # survey-weighted or bootstrap fits leave metadata unchanged
    # (codex CI R7 P1 fix).
    cohort_trends: bool = field(default=False, repr=False)
    aggregation_weights: Dict[str, str] = field(
        default_factory=lambda: {"simple": "cell"}, repr=False
    )

    # ------------------------------------------------------------------ #
    # Internal — used by aggregate() for delta-method SEs                 #
    # ------------------------------------------------------------------ #
    _gt_weights: Dict[Tuple[Any, Any], int] = field(default_factory=dict, repr=False)
    _n_g_per_cohort: Dict[Any, int] = field(default_factory=dict, repr=False)
    """Unit count per treated cohort ``g`` (``N_g`` in paper Eqs. 7.4, 7.6).
    Populated at fit time from the analysis sample; used by
    ``aggregate(weights="cohort_share")`` (paper Section 7) to compute
    the simple-overall cohort-share weights ``ω̂_g`` and event-time
    weights ``ω̂_{ge}``. Empty dict on fits that pre-date the PR-B
    cohort-share surface (no information loss — ``weights="cell"`` is
    unaffected)."""
    _gt_vcov: Optional[np.ndarray] = field(default=None, repr=False)
    """Full vcov of all β_{g,t} coefficients (ordered same as sorted group_time_effects keys)."""
    _gt_keys: List[Tuple[Any, Any]] = field(default_factory=list, repr=False)
    """Ordered list of (g,t) keys corresponding to _gt_vcov columns."""
    _df_survey: Optional[int] = field(default=None, repr=False)
    """Survey degrees of freedom for t-distribution inference."""
    _bm_per_cell_dof: Dict[Tuple[Any, Any], float] = field(default_factory=dict, repr=False)
    """Per-cell Bell-McCaffrey Satterthwaite DOF (only populated for vcov_type='hc2_bm').
    Used by group_time_effects[(g, t)] inference fields at fit time."""
    _bm_artifacts: Optional[
        Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[Tuple[Any, Any], int]]
    ] = field(default=None, repr=False)
    """(X_red, cluster_ids, bread_red, coef_idx_map) for hc2_bm; enables
    lazy BM contrast-DOF computation in aggregate().

    ``X_red`` / ``bread_red`` are the REDUCED (kept-column) design and bread
    matrix produced by ``_fit_ols`` after rank-deficient column drops — the
    same subspace ``solve_ols`` returned non-NaN coefficients in.
    ``coef_idx_map`` maps each ``(g, t)`` cell present in
    ``group_time_effects`` to its column index in ``X_red``. Storing reduced
    artifacts avoids the singular full-design bread that
    ``_compute_cr2_bm_contrast_dof`` would otherwise reject."""
    _df_one_way: Optional[float] = field(default=None, repr=False)
    """Residual DOF (``n - rank(X)``) for one-way ``vcov_type in
    {"classical","hc2"}`` paths (full-dummy, no survey). ``aggregate()``
    uses this to thread R's ``lm()`` t-distribution into per-key
    inference. ``None`` on hc1 / hc2_bm / surveyed paths (which use BM
    DOF or ``_df_survey`` instead)."""

    # ------------------------------------------------------------------ #
    # Public methods                                                      #
    # ------------------------------------------------------------------ #


[docs]
    def aggregate(self, type: str, weights: str = "cell") -> "WooldridgeDiDResults":  # noqa: A002
        """Compute and store one of the four jwdid_estat aggregation types.

        Parameters
        ----------
        type : "simple" | "group" | "calendar" | "event"
        weights : "cell" | "cohort_share", default "cell"
            Aggregation weighting scheme. ``"cell"`` (default) uses cell-
            count ``n_{g,t}`` observation counts and matches Stata
            ``jwdid_estat``. ``"cohort_share"`` uses paper W2025 Eq. 7.4
            ``ω̂_g = N_g / Σ_{g'} N_{g'} M_{g'}`` for ``type="simple"`` and
            Eq. 7.6 ``ω̂_{ge} = N_g / Σ_{g': g'+e ≤ T} N_{g'}`` for
            ``type="event"``. Both formulas reduce to ``N_g``-proportional
            per-cell weights with the appropriate normalization. The two
            schemes coincide on balanced panels with uniform within-cohort
            cell counts (paper Section 7.5). The cohort-share scheme is
            supported only for ``type="simple"`` and ``type="event"``; the
            paper provides no explicit cohort-share formula for ``"group"``
            or ``"calendar"`` aggregations and the library raises
            ``ValueError`` to preserve a fail-closed contract.

        Returns self for chaining.

        Notes
        -----
        When ``vcov_type == "hc2_bm"``, aggregated inference (t_stat / p_value /
        conf_int) uses Bell-McCaffrey Satterthwaite contrast-specific DOFs
        rather than the survey/None default. The BM DOFs are computed lazily
        from ``_bm_artifacts`` via ``_compute_cr2_bm_contrast_dof`` and
        fail-closed (NaN inference) when the helper raises or returns NaN —
        per ``feedback_bm_contrast_dof_fail_closed``. The contrast column
        is rebuilt under the active ``weights`` scheme so the BM DOF
        reflects the actual weighting used by ATT + SE.
        """
        valid = ("simple", "group", "calendar", "event")
        if type not in valid:
            raise ValueError(f"type must be one of {valid}, got {type!r}")

        valid_weights = ("cell", "cohort_share")
        if weights not in valid_weights:
            raise ValueError(f"weights must be one of {valid_weights}, got {weights!r}")
        if weights == "cohort_share" and type in ("group", "calendar"):
            raise ValueError(
                f"weights='cohort_share' is only supported for type='simple' "
                f"(paper W2025 Eq. 7.4) and type='event' (paper W2025 Eq. 7.6). "
                f"type={type!r} has no explicit paper closed-form cohort-share "
                f"weighting; use weights='cell' (default) for "
                f"jwdid_estat-style cell-count weighting."
            )

        gt = self.group_time_effects
        cell_weights = self._gt_weights
        n_g_per_cohort = self._n_g_per_cohort
        vcov = self._gt_vcov
        keys_ordered = self._gt_keys if self._gt_keys else sorted(gt.keys())

        # Map each cell to its un-normalized weight under the active scheme.
        # The aggregation step normalizes by ``w_total`` per aggregation
        # key, so only relative magnitudes matter here. For the cohort_share
        # scheme, the per-cell weight is ``N_g`` (paper Eqs. 7.4, 7.6)
        # — the same per-cell value across simple-overall and event-time;
        # the per-key normalization differs because the cell sets differ
        # (event-time aggregations group cells with the same ``k = t - g``,
        # so the denominator picks up only cohorts present at event-time
        # ``k`` per paper Eq. 7.6).
        def _cell_weight(c: Tuple[Any, Any]) -> float:
            if weights == "cell":
                return float(cell_weights.get(c, 0))
            # cohort_share
            return float(n_g_per_cohort.get(c[0], 0))

        def _agg_se(w_vec: np.ndarray) -> float:
            """Delta-method SE for a linear combination w'β given full vcov."""
            if vcov is None or len(w_vec) != vcov.shape[0]:
                return float("nan")
            return float(np.sqrt(max(w_vec @ vcov @ w_vec, 0.0)))

        # Compute BM contrast DOFs lazily for hc2_bm. ``cells_by_key`` is an
        # ordered mapping of aggregation_key -> list of (g, t) cells; the
        # contrast for each key sums the per-cell one-hot vectors weighted
        # by the active scheme's normalized per-cell weight. Returns a dict
        # mapping aggregation_key -> df (or NaN on fail-closed). For
        # non-hc2_bm, returns an empty dict (caller falls back to
        # ``self._df_survey``). Rebuilds the contrast column under the
        # active ``weights`` scheme so the BM DOF matches the actual SE
        # computation.
        def _bm_contrast_dofs_for(
            cells_by_key: Dict[Any, List[Tuple[Any, Any]]],
        ) -> Dict[Any, float]:
            if self.vcov_type != "hc2_bm" or self._bm_artifacts is None:
                return {}
            # ``X_red`` / ``bread_red`` are the REDUCED kept-column artifacts
            # from ``_fit_ols`` (post rank-deficient drops). ``coef_idx_map``
            # maps (g, t) → column index in ``X_red``. See
            # ``_bm_artifacts`` docstring above for the rationale.
            X_red, cluster_ids_full, bread_red, coef_idx_map = self._bm_artifacts
            n_red = X_red.shape[1]
            contrast_cols: List[np.ndarray] = []
            agg_keys: List[Any] = []
            for agg_key, cells in cells_by_key.items():
                if not cells:
                    continue
                w_total = sum(_cell_weight(c) for c in cells)
                if w_total == 0:
                    continue
                col = np.zeros(n_red)
                contributed = False
                for c in cells:
                    if c not in coef_idx_map:
                        continue
                    col[coef_idx_map[c]] = _cell_weight(c) / w_total
                    contributed = True
                if not contributed:
                    continue
                contrast_cols.append(col)
                agg_keys.append(agg_key)
            if not contrast_cols:
                return {k: float("nan") for k in cells_by_key}
            from diff_diff.linalg import _compute_cr2_bm_contrast_dof

            contrasts_matrix = np.column_stack(contrast_cols)
            dof_map: Dict[Any, float] = {}
            try:
                dof_vec = _compute_cr2_bm_contrast_dof(
                    X_red, cluster_ids_full, bread_red, contrasts_matrix
                )
                for i, k in enumerate(agg_keys):
                    candidate = float(dof_vec[i])
                    dof_map[k] = candidate if np.isfinite(candidate) else float("nan")
            except (ValueError, np.linalg.LinAlgError) as exc:
                warnings.warn(
                    f"WooldridgeDiDResults.aggregate({type!r}) could not "
                    f"compute Bell-McCaffrey contrast DOF "
                    f"({exc.__class__.__name__}: {exc}). "
                    "Affected aggregated inference (t_stat / p_value / "
                    "conf_int) will be NaN to preserve the hc2_bm contract.",
                    UserWarning,
                    stacklevel=3,
                )
                for k in agg_keys:
                    dof_map[k] = float("nan")
            # Fill non-computed keys with NaN to fail-closed.
            for k in cells_by_key:
                dof_map.setdefault(k, float("nan"))
            return dof_map

        def _build_effect(
            att: float, se: float, df_for_inference: Optional[float]
        ) -> Dict[str, Any]:
            """Build an effect dict using ``df_for_inference`` for the t-distribution.

            When ``self.vcov_type == "hc2_bm"``, ``df_for_inference`` should be
            the BM contrast DOF (NaN → fail-closed). For ``classical`` /
            ``hc2`` (one-way, no survey) the residual DOF ``self._df_one_way``
            is used so per-key inference matches R ``lm()`` /
            ``coef_test()`` t-distribution. For hc1 / surveyed paths,
            ``self._df_survey`` (None → normal-theory) is used.

            Under ``weights="cohort_share"`` (variable
            ``cohort_share_inference_fail_closed=True``), the inference
            fields (t-stat / p-value / conf-int) are nulled to NaN
            because the analytical SE is conditional-on-shares and
            understates unconditional uncertainty per paper W2025
            Section 7.5. The point estimate and conditional-on-shares
            SE are still returned for reference.
            """
            if cohort_share_inference_fail_closed:
                return {
                    "att": att,
                    "se": se,
                    "t_stat": float("nan"),
                    "p_value": float("nan"),
                    "conf_int": (float("nan"), float("nan")),
                }
            if self.vcov_type == "hc2_bm":
                if df_for_inference is None or not np.isfinite(df_for_inference):
                    return {
                        "att": att,
                        "se": se,
                        "t_stat": float("nan"),
                        "p_value": float("nan"),
                        "conf_int": (float("nan"), float("nan")),
                    }
                t_stat, p_value, conf_int = safe_inference(
                    att, se, alpha=self.alpha, df=df_for_inference
                )
            elif (
                self.vcov_type in ("classical", "hc2")
                and self._df_one_way is not None
                and np.isfinite(self._df_one_way)
            ):
                t_stat, p_value, conf_int = safe_inference(
                    att, se, alpha=self.alpha, df=self._df_one_way
                )
            else:
                t_stat, p_value, conf_int = safe_inference(
                    att, se, alpha=self.alpha, df=self._df_survey
                )
            return {
                "att": att,
                "se": se,
                "t_stat": t_stat,
                "p_value": p_value,
                "conf_int": conf_int,
            }

        # Cohort-share scheme requires populated _n_g_per_cohort; raise an
        # informative error rather than silently returning zero-weighted
        # NaN aggregates.
        if weights == "cohort_share" and not n_g_per_cohort:
            raise ValueError(
                "weights='cohort_share' requires per-cohort unit counts "
                "(_n_g_per_cohort) populated at fit time; this Results "
                "object has none. Re-fit with the current WooldridgeDiD "
                "version, or use weights='cell' (default) on legacy fits."
            )

        # Survey + cohort_share composition is not yet supported. Codex R3
        # P0 fix: ``_n_g_per_cohort`` is populated as raw ``unit.nunique()``
        # counts, so composing design-weighted ATT estimates (survey TSL)
        # with unweighted cohort shares targets a mixed estimand that is
        # not paper W2025 Section 7's design-population cohort-share form.
        # Design-consistent cohort totals (survey-weighted unit totals per
        # cohort) require additional plumbing — fail-closed for now,
        # tracked in TODO follow-up.
        if weights == "cohort_share" and self.survey_metadata is not None:
            raise ValueError(
                "aggregate(weights='cohort_share') is not yet supported on "
                "survey-weighted fits (survey_design is not None): the "
                "cohort-share weights would compose design-weighted ATTs "
                "with unweighted cohort shares, targeting a mixed estimand "
                "that is not paper W2025 Section 7's design-population "
                "cohort-share form. Design-consistent cohort totals are "
                "deferred to a follow-up; use weights='cell' (default) "
                "on survey-weighted fits."
            )

        # Cohort-share variance conditional-on-shares disclaimer (paper
        # W2025 Section 7.5 / Eq. 7.4-7.6 discussion). The analytical SE
        # computed below treats the cohort-share weights ``ω̂_g`` /
        # ``ω̂_{ge}`` as fixed at their realized values, which means the
        # SE understates the unconditional sampling uncertainty from
        # estimating the shares themselves. Per `feedback_no_silent_failures`
        # and codex R2 P1 fix, fail-closed on the inference fields
        # (NaN out t-stat / p-value / conf-int) and emit a UserWarning
        # explaining the conditional-on-shares contract. The POINT
        # estimate ``att`` (paper Eq. 7.4 / 7.6 hand-calc form) and the
        # ``se`` (conditional-on-shares delta method) are still computed
        # and returned for reference, but the inferential machinery is
        # nulled out until proper APE/GMM-style aggregate inference is
        # derived (tracked in TODO).
        cohort_share_inference_fail_closed = weights == "cohort_share"
        if cohort_share_inference_fail_closed:
            warnings.warn(
                "weights='cohort_share' aggregation: the analytical SE and "
                "inference (t-stat / p-value / conf-int) computed by "
                "WooldridgeDiDResults.aggregate(..., weights='cohort_share') "
                "treat the cohort-share weights ω̂_g / ω̂_{ge} as fixed; "
                "this conditional-on-shares variance understates the "
                "unconditional sampling uncertainty per paper W2025 "
                "Section 7.5. The library fail-closes the t-stat / p-value "
                "/ conf-int fields to NaN until proper APE/GMM-style "
                "aggregate inference is derived (tracked in TODO). The "
                "POINT estimate and conditional-on-shares SE are computed "
                "and returned for reference; use weights='cell' (default) "
                "for the analytical aggregation with full inference.",
                UserWarning,
                stacklevel=2,
            )

        if type == "simple":
            # Bootstrap interaction guard: when ``_bootstrap_used`` was set
            # by ``_fit_ols`` (the multiplier bootstrap overrode the
            # analytical ``overall_*`` fields), the default
            # ``weights="cell"`` path is a no-op (preserves bootstrap
            # inference). The opt-in ``weights="cohort_share"`` path is not
            # bootstrapped — re-fit with ``n_bootstrap=0`` to use the
            # analytical cohort-share inference, or wait for the deferred
            # bootstrap-cohort-share follow-up (tracked in TODO).
            if self._bootstrap_used:
                if weights == "cell":
                    return self
                raise ValueError(
                    "aggregate(type='simple', weights='cohort_share') is "
                    "not supported on bootstrapped fits "
                    "(n_bootstrap > 0): the multiplier bootstrap was run "
                    "on the cell-count-weighted overall ATT at fit time, "
                    "and the cohort-share aggregation has no matching "
                    "bootstrap variant yet. Re-fit with n_bootstrap=0 to "
                    "use cohort-share + analytical inference."
                )
            # Recompute overall ATT + SE under the active weighting scheme.
            # Under weights="cell" the result matches what fit() populated
            # at machine precision (re-derived from the same cell weights);
            # under weights="cohort_share" the overall ATT, SE, and BM
            # contrast DOF (under hc2_bm) are recomputed with cohort-share
            # per-cell weights per paper Eq. 7.4.
            cells_simple = [(g, t) for (g, t) in keys_ordered if g > 0 and t >= g]
            cells_by_simple: Dict[Any, List[Tuple[Any, Any]]] = {"simple": cells_simple}
            dofs = _bm_contrast_dofs_for(cells_by_simple)
            if cells_simple:
                w_total = sum(_cell_weight(c) for c in cells_simple)
                if w_total > 0:
                    att = sum(_cell_weight(c) * gt[c]["att"] for c in cells_simple) / w_total
                    w_vec = np.array(
                        [
                            _cell_weight(c) / w_total if c in cells_simple else 0.0
                            for c in keys_ordered
                        ]
                    )
                    se = _agg_se(w_vec)
                    eff = _build_effect(att, se, dofs.get("simple"))
                    self.overall_att = eff["att"]
                    self.overall_se = eff["se"]
                    self.overall_t_stat = eff["t_stat"]
                    self.overall_p_value = eff["p_value"]
                    self.overall_conf_int = eff["conf_int"]
                    # Atomic metadata mutation — only after successful write.
                    self.aggregation_weights["simple"] = weights

        elif type == "group":
            cells_by_g: Dict[Any, List[Tuple[Any, Any]]] = {}
            for g in self.groups:
                cells_by_g[g] = [(g2, t) for (g2, t) in keys_ordered if g2 == g and t >= g]
            dofs = _bm_contrast_dofs_for(cells_by_g)
            result: Dict[Any, Dict] = {}
            for g, cells in cells_by_g.items():
                if not cells:
                    continue
                w_total = sum(_cell_weight(c) for c in cells)
                if w_total == 0:
                    continue
                att = sum(_cell_weight(c) * gt[c]["att"] for c in cells) / w_total
                w_vec = np.array(
                    [_cell_weight(c) / w_total if c in cells else 0.0 for c in keys_ordered]
                )
                se = _agg_se(w_vec)
                result[g] = _build_effect(att, se, dofs.get(g))
            self.group_effects = result
            self.aggregation_weights["group"] = weights

        elif type == "calendar":
            cells_by_t: Dict[Any, List[Tuple[Any, Any]]] = {}
            for t in self.time_periods:
                cells_by_t[t] = [(g, t2) for (g, t2) in keys_ordered if t2 == t and t >= g]
            dofs = _bm_contrast_dofs_for(cells_by_t)
            result = {}
            for t, cells in cells_by_t.items():
                if not cells:
                    continue
                w_total = sum(_cell_weight(c) for c in cells)
                if w_total == 0:
                    continue
                att = sum(_cell_weight(c) * gt[c]["att"] for c in cells) / w_total
                w_vec = np.array(
                    [_cell_weight(c) / w_total if c in cells else 0.0 for c in keys_ordered]
                )
                se = _agg_se(w_vec)
                result[t] = _build_effect(att, se, dofs.get(t))
            self.calendar_effects = result
            self.aggregation_weights["calendar"] = weights

        elif type == "event":
            # Paper W2025 Eq. 7.6 cohort-share-by-exposure weighting is
            # defined for post-treatment exposure times (k >= 0) only;
            # pre-treatment lead effects use a separate Eq. 7.7
            # construction with ``nw_it`` weights that the library does
            # not yet expose. Under ``weights="cohort_share"`` we
            # restrict event aggregation to ``k >= 0`` to avoid
            # silently applying Eq. 7.6 weights to negative-lead cells
            # (codex R4 P1 fix). Under ``weights="cell"`` the full event
            # range is preserved for backward compatibility (pre-period
            # leads serve as placebos under OLS + never_treated).
            if weights == "cohort_share":
                eligible_pairs = [(g, t) for (g, t) in keys_ordered if t - g >= 0]
            else:
                eligible_pairs = list(keys_ordered)
            all_k = sorted({t - g for (g, t) in eligible_pairs})
            cells_by_k: Dict[int, List[Tuple[Any, Any]]] = {}
            for k in all_k:
                cells_by_k[k] = [(g, t) for (g, t) in eligible_pairs if t - g == k]
            dofs = _bm_contrast_dofs_for(cells_by_k)
            result = {}
            for k, cells in cells_by_k.items():
                if not cells:
                    continue
                w_total = sum(_cell_weight(c) for c in cells)
                if w_total == 0:
                    continue
                att = sum(_cell_weight(c) * gt[c]["att"] for c in cells) / w_total
                w_vec = np.array(
                    [_cell_weight(c) / w_total if c in cells else 0.0 for c in keys_ordered]
                )
                se = _agg_se(w_vec)
                result[k] = _build_effect(att, se, dofs.get(k))
            self.event_study_effects = result
            self.aggregation_weights["event"] = weights

        return self



[docs]
    def summary(self, aggregation: str = "simple") -> str:
        """Print formatted summary table.

        Parameters
        ----------
        aggregation : which aggregation to display ("simple", "group", "calendar", "event")
        """
        lines = [
            "=" * 70,
            "    Wooldridge Extended Two-Way Fixed Effects (ETWFE) Results",
            "=" * 70,
            f"Method:          {self.method}",
            f"Control group:   {self.control_group}",
            f"Observations:    {self.n_obs}",
            f"Treated units:   {self.n_treated_units}",
            f"Control units:   {self.n_control_units}",
            f"Cohort trends:   {self.cohort_trends}",
            f"Aggregation w:   {self.aggregation_weights.get(aggregation, 'cell')}",
            "-" * 70,
        ]

        if self.survey_metadata is not None:
            from diff_diff.results import _format_survey_block

            lines.extend(_format_survey_block(self.survey_metadata, 70))
            lines.append("-" * 70)

        # Conley spatial-HAC variance label (rendered only on the conley path;
        # a full vcov-family label for all families is a separate follow-up).
        if self.vcov_type == "conley":
            from diff_diff.results import _format_vcov_label

            _vlabel = _format_vcov_label(
                self.vcov_type,
                cluster_name=self.cluster_name,
                n_clusters=self.n_clusters,
                n_obs=self.n_obs,
                conley_lag_cutoff=self.conley_lag_cutoff,
            )
            if _vlabel:
                lines.append(f"Std. errors:     {_vlabel}")
                lines.append("-" * 70)

        def _fmt_row(label: str, att: float, se: float, t: float, p: float, ci: Tuple) -> str:
            from diff_diff.results import _get_significance_stars

            stars = _get_significance_stars(p) if not np.isnan(p) else ""
            ci_lo = f"{ci[0]:.4f}" if not np.isnan(ci[0]) else "NaN"
            ci_hi = f"{ci[1]:.4f}" if not np.isnan(ci[1]) else "NaN"
            return (
                f"{label:<22} {att:>10.4f}  {se:>10.4f}  {t:>8.3f}  "
                f"{p:>8.4f}{stars}   [{ci_lo}, {ci_hi}]"
            )

        ci_pct = f"{(1 - self.alpha) * 100:.0f}%"
        header = (
            f"{'Parameter':<22} {'Estimate':>10}  {'Std. Err.':>10}  "
            f"{'t-stat':>8}  {'P>|t|':>8}   [{ci_pct} CI]"
        )
        lines.append(header)
        lines.append("-" * 70)

        if aggregation == "simple":
            lines.append(
                _fmt_row(
                    "ATT (simple)",
                    self.overall_att,
                    self.overall_se,
                    self.overall_t_stat,
                    self.overall_p_value,
                    self.overall_conf_int,
                )
            )
        elif aggregation == "group" and self.group_effects:
            for g, eff in sorted(self.group_effects.items()):
                lines.append(
                    _fmt_row(
                        f"ATT(g={g})",
                        eff["att"],
                        eff["se"],
                        eff["t_stat"],
                        eff["p_value"],
                        eff["conf_int"],
                    )
                )
        elif aggregation == "calendar" and self.calendar_effects:
            for t, eff in sorted(self.calendar_effects.items()):
                lines.append(
                    _fmt_row(
                        f"ATT(t={t})",
                        eff["att"],
                        eff["se"],
                        eff["t_stat"],
                        eff["p_value"],
                        eff["conf_int"],
                    )
                )
        elif aggregation == "event" and self.event_study_effects:
            for k, eff in sorted(self.event_study_effects.items()):
                if k < -self.anticipation:
                    suffix = " [pre]"
                elif k < 0:
                    suffix = " [antic]"
                else:
                    suffix = ""
                label = f"ATT(k={k})" + suffix
                lines.append(
                    _fmt_row(
                        label,
                        eff["att"],
                        eff["se"],
                        eff["t_stat"],
                        eff["p_value"],
                        eff["conf_int"],
                    )
                )
        else:
            lines.append(f"  (call .aggregate({aggregation!r}) first)")

        lines.append("=" * 70)
        return "\n".join(lines)



[docs]
    def to_dataframe(self, aggregation: str = "event") -> pd.DataFrame:
        """Export aggregated effects to a DataFrame.

        Parameters
        ----------
        aggregation : "simple" | "group" | "calendar" | "event" | "gt"
            Use "gt" to export raw group-time effects.
        """
        if aggregation == "gt":
            rows = []
            for (g, t), eff in sorted(self.group_time_effects.items()):
                row = {"cohort": g, "time": t, "relative_period": t - g}
                row.update(eff)
                rows.append(row)
            return pd.DataFrame(rows)

        # Active weighting scheme for the requested aggregation surface
        # (default "cell"; flips to "cohort_share" after an opt-in
        # cohort-share aggregation on that surface). Stamped on every
        # exported row so downstream consumers can tell which estimand
        # the row represents without having to inspect the originating
        # Results object.
        active_weights = self.aggregation_weights.get(aggregation, "cell")
        mapping = {
            "simple": [
                {
                    "label": "ATT",
                    "att": self.overall_att,
                    "se": self.overall_se,
                    "t_stat": self.overall_t_stat,
                    "p_value": self.overall_p_value,
                    "conf_int_lo": self.overall_conf_int[0],
                    "conf_int_hi": self.overall_conf_int[1],
                    "cohort_trends": self.cohort_trends,
                    "aggregation_weights": active_weights,
                }
            ],
            "group": [
                {
                    "cohort": g,
                    **{k: v for k, v in eff.items() if k != "conf_int"},
                    "conf_int_lo": eff["conf_int"][0],
                    "conf_int_hi": eff["conf_int"][1],
                    "cohort_trends": self.cohort_trends,
                    "aggregation_weights": active_weights,
                }
                for g, eff in sorted((self.group_effects or {}).items())
            ],
            "calendar": [
                {
                    "time": t,
                    **{k: v for k, v in eff.items() if k != "conf_int"},
                    "conf_int_lo": eff["conf_int"][0],
                    "conf_int_hi": eff["conf_int"][1],
                    "cohort_trends": self.cohort_trends,
                    "aggregation_weights": active_weights,
                }
                for t, eff in sorted((self.calendar_effects or {}).items())
            ],
            "event": [
                {
                    "relative_period": k,
                    **{kk: vv for kk, vv in eff.items() if kk != "conf_int"},
                    "conf_int_lo": eff["conf_int"][0],
                    "conf_int_hi": eff["conf_int"][1],
                    "cohort_trends": self.cohort_trends,
                    "aggregation_weights": active_weights,
                }
                for k, eff in sorted((self.event_study_effects or {}).items())
            ],
        }
        rows = mapping.get(aggregation, [])
        return pd.DataFrame(rows)



[docs]
    def plot_event_study(self, weights: str = "cell", **kwargs) -> None:
        """Event study plot. Always calls ``aggregate('event', weights=weights)``.

        Parameters
        ----------
        weights : "cell" | "cohort_share", default "cell"
            Aggregation weighting scheme threaded into the underlying
            ``aggregate("event", ...)`` call. ``"cohort_share"`` produces
            paper W2025 Eq. 7.6 cohort-share-by-exposure weights
            (post-treatment ``k >= 0`` only); inference fields are
            fail-closed to NaN per the Section 7.5 conditional-on-shares
            contract documented in REGISTRY, and the plot **suppresses
            error bars / CI bands** to honor the fail-closed contract
            (the conditional-on-shares SE would build a misleading
            normal-theory CI in the plotter).
        **kwargs
            Forwarded to ``diff_diff.visualization.plot_event_study``.

        Notes
        -----
        The wrapper unconditionally re-aggregates the event study under
        the requested ``weights`` scheme. This avoids the stale-cache
        hazard where a prior ``plot_event_study(weights="cohort_share")``
        call would leave the cached ``event_study_effects`` restricted
        to ``k >= 0`` (per the Eq. 7.6 scope), and a subsequent
        ``plot_event_study()`` (default ``weights="cell"``) call would
        silently reuse the cohort-share-keyed cache instead of restoring
        the full event range including pre-period placebo leads.
        """
        # Always re-aggregate under the requested weighting scheme. The
        # aggregate() method replaces ``event_study_effects`` in place
        # per the existing contract, so this is cheap and avoids
        # cohort_share→cell (or any cross-scheme) stale-cache bugs.
        self.aggregate("event", weights=weights)

        from diff_diff.visualization import plot_event_study

        effects = {k: v["att"] for k, v in (self.event_study_effects or {}).items()}
        if weights == "cohort_share":
            # Honor the fail-closed inference contract per paper Section
            # 7.5: the conditional-on-shares SE understates unconditional
            # uncertainty, so passing the finite SE into the plotter
            # would let it render a normal-theory CI that contradicts
            # the NaN inference fields the aggregate() helper produces.
            # Pass NaN SEs so the plotter suppresses error bars / CI
            # bands. Locked by ``test_plot_event_study_cohort_share_suppresses_error_bars``.
            se = {k: float("nan") for k in (self.event_study_effects or {})}
        else:
            se = {k: v["se"] for k, v in (self.event_study_effects or {}).items()}
        plot_event_study(effects=effects, se=se, alpha=self.alpha, **kwargs)


    # --- Inference-field aliases (balance/external-adapter compatibility) ---
    @property
    def att(self) -> float:
        return self.overall_att

    @property
    def se(self) -> float:
        return self.overall_se

    @property
    def conf_int(self) -> Tuple[float, float]:
        return self.overall_conf_int

    @property
    def p_value(self) -> float:
        return self.overall_p_value

    @property
    def t_stat(self) -> float:
        return self.overall_t_stat

    def __repr__(self) -> str:
        n_gt = len(self.group_time_effects)
        att_str = f"{self.overall_att:.4f}" if not np.isnan(self.overall_att) else "NaN"
        se_str = f"{self.overall_se:.4f}" if not np.isnan(self.overall_se) else "NaN"
        p_str = f"{self.overall_p_value:.4f}" if not np.isnan(self.overall_p_value) else "NaN"
        # Surface the active simple aggregation scheme (the one that
        # produced the printed ``overall_*`` values) + cohort_trends
        # flag so repr is self-describing for downstream consumers.
        simple_weights = self.aggregation_weights.get("simple", "cell")
        return (
            f"WooldridgeDiDResults("
            f"ATT={att_str}, SE={se_str}, p={p_str}, "
            f"n_gt={n_gt}, method={self.method!r}, "
            f"cohort_trends={self.cohort_trends}, "
            f"aggregation_weights={simple_weights!r})"
        )