"""Results class for WooldridgeDiD (ETWFE) estimator."""
from __future__ import annotations
import warnings
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
from diff_diff.utils import safe_inference
[docs]
@dataclass
class WooldridgeDiDResults:
"""Results from WooldridgeDiD.fit().
Core output is ``group_time_effects``: a dict keyed by (cohort_g, time_t)
with per-cell ATT estimates and inference. Call
``.aggregate(type, weights=...)`` to compute any of the four
``jwdid_estat`` aggregation types under either the default
cell-count weighting (``weights="cell"``, matches Stata
``jwdid_estat``) or the paper W2025 opt-in cohort-share weighting
(``weights="cohort_share"``, Eqs. 7.4 / 7.6; restricted to
``type ∈ {"simple", "event"}``). ``cohort_trend_coefs`` carries
Section 8 / Eq. 8.1 estimated ``δ_g`` slopes when the fit was
produced under ``WooldridgeDiD(cohort_trends=True)``.
``aggregation_weights`` is keyed by aggregation type and records
the active weighting scheme that wrote to each cached surface
(surfaced in ``summary()`` / ``to_dataframe()`` / ``__repr__``).
"""
# ------------------------------------------------------------------ #
# Core cohort×time estimates #
# ------------------------------------------------------------------ #
group_time_effects: Dict[Tuple[Any, Any], Dict[str, Any]]
"""key=(g,t), value={att, se, t_stat, p_value, conf_int}"""
# ------------------------------------------------------------------ #
# Simple (overall) aggregation — always populated at fit time #
# ------------------------------------------------------------------ #
overall_att: float
overall_se: float
overall_t_stat: float
overall_p_value: float
overall_conf_int: Tuple[float, float]
# ------------------------------------------------------------------ #
# Other aggregations — populated by .aggregate() #
# ------------------------------------------------------------------ #
group_effects: Optional[Dict[Any, Dict]] = field(default=None, repr=False)
calendar_effects: Optional[Dict[Any, Dict]] = field(default=None, repr=False)
event_study_effects: Optional[Dict[int, Dict]] = field(default=None, repr=False)
# ------------------------------------------------------------------ #
# Metadata #
# ------------------------------------------------------------------ #
method: str = "ols"
control_group: str = "not_yet_treated"
groups: List[Any] = field(default_factory=list)
time_periods: List[Any] = field(default_factory=list)
n_obs: int = 0
n_treated_units: int = 0
n_control_units: int = 0
alpha: float = 0.05
anticipation: int = 0
survey_metadata: Optional[Any] = field(default=None, repr=False)
# Variance-family metadata. ``vcov_type`` records the configured analytical
# family ("classical", "hc1", "hc2", "hc2_bm", or "conley" — the conley
# spatial-HAC path also populates ``conley_lag_cutoff``); when ``survey_design=``
# is supplied the survey TSL (or replicate-weight refit) variance overrides
# this — the field still records the configured value and
# ``survey_metadata`` indicates the survey path was active. On bootstrap
# fits (``n_bootstrap > 0``) the SE comes from the multiplier bootstrap,
# not the analytical family. ``cluster_name`` / ``n_clusters`` are
# populated when the fit was clustered (default unit cluster, or
# user-set ``cluster=X``); both are ``None`` on explicit one-way
# (``vcov_type in {"classical","hc2"}`` + no user cluster) fits where
# the auto-cluster was dropped.
vcov_type: str = "hc1"
cluster_name: Optional[str] = None
n_clusters: Optional[int] = None
# Conley spatial-HAC within-unit Bartlett max lag (populated only when
# ``vcov_type == "conley"``; ``None`` otherwise). Carries the configured
# ``conley_lag_cutoff`` for the summary variance label.
conley_lag_cutoff: Optional[int] = None
# Heterogeneous cohort-specific linear trends (paper W2025 Section 8 /
# Eq. 8.1). Keyed by treated cohort ``g`` → estimated slope ``δ_g``.
# Empty dict when ``WooldridgeDiD`` was fit with ``cohort_trends=False``
# (the default). Populated only via the OLS path; logit / poisson
# reject ``cohort_trends=True`` at the constructor per paper Section 8
# OLS-only scope.
#
# Identification + baseline normalization (paper W2025 Section 5.4):
# the reported ``δ_g`` slopes are RELATIVE TO THE BASELINE TREND
# absorbed by the design — the never-treated cohort's trend (when a
# never-treated cohort exists) OR the last cohort's trend (when no
# never-treated cohort exists, per the all-eventually-treated drop
# rule). On all-treated panels the last cohort is intentionally
# absent from the dict; its slope is the baseline (zero in deviation
# form). See REGISTRY ``## WooldridgeDiD (ETWFE)`` → "Heterogeneous
# cohort trends" Notes for the exact normalization contract.
cohort_trend_coefs: Dict[Any, float] = field(default_factory=dict, repr=False)
# Flag set by ``_fit_ols`` when ``n_bootstrap > 0`` AND the multiplier
# bootstrap actually ran (i.e., produced at least one valid bootstrap
# statistic). When True, ``aggregate(type="simple", weights="cell")``
# is a no-op (preserves the bootstrap inference populated at fit time)
# and ``aggregate(type="simple", weights="cohort_share")`` raises
# because the cohort-share aggregation is not bootstrapped — re-fit
# with ``n_bootstrap=0`` to use cohort-share + analytical inference,
# or wait for the deferred bootstrap-cohort-share follow-up.
_bootstrap_used: bool = field(default=False, repr=False)
# Model-surface metadata for self-describing reporting.
# ``cohort_trends`` records whether the fit was produced under the
# Section 8 / Eq. 8.1 heterogeneous-cohort-trends design (paper
# W2025 ``dg_i · t`` interactions on the OLS path). False on the
# default ``cohort_trends=False`` fit and on logit/Poisson paths
# (which reject ``cohort_trends=True`` at the constructor).
#
# ``aggregation_weights`` records the weighting scheme PER cached
# aggregation surface so ``summary()`` / ``to_dataframe()`` /
# ``__repr__()`` can label each surface correctly under mixed-order
# ``aggregate(weights=...)`` calls. Keys: ``"simple"`` (matches the
# ``overall_*`` fields), ``"group"``, ``"calendar"``, ``"event"``.
# The fit-time ``overall_*`` is cell-weighted, so ``"simple"`` is
# initialized to ``"cell"`` and only flips after a successful
# ``aggregate(type="simple", weights="cohort_share")`` call. The
# other keys are populated lazily by ``aggregate()``. Mutation is
# atomic — only set after the aggregation passes all validation
# AND completes successfully, so failed cohort_share calls on
# survey-weighted or bootstrap fits leave metadata unchanged
# (codex CI R7 P1 fix).
cohort_trends: bool = field(default=False, repr=False)
aggregation_weights: Dict[str, str] = field(
default_factory=lambda: {"simple": "cell"}, repr=False
)
# ------------------------------------------------------------------ #
# Internal — used by aggregate() for delta-method SEs #
# ------------------------------------------------------------------ #
_gt_weights: Dict[Tuple[Any, Any], int] = field(default_factory=dict, repr=False)
_n_g_per_cohort: Dict[Any, int] = field(default_factory=dict, repr=False)
"""Unit count per treated cohort ``g`` (``N_g`` in paper Eqs. 7.4, 7.6).
Populated at fit time from the analysis sample; used by
``aggregate(weights="cohort_share")`` (paper Section 7) to compute
the simple-overall cohort-share weights ``ω̂_g`` and event-time
weights ``ω̂_{ge}``. Empty dict on fits that pre-date the PR-B
cohort-share surface (no information loss — ``weights="cell"`` is
unaffected)."""
_gt_vcov: Optional[np.ndarray] = field(default=None, repr=False)
"""Full vcov of all β_{g,t} coefficients (ordered same as sorted group_time_effects keys)."""
_gt_keys: List[Tuple[Any, Any]] = field(default_factory=list, repr=False)
"""Ordered list of (g,t) keys corresponding to _gt_vcov columns."""
_df_survey: Optional[int] = field(default=None, repr=False)
"""Survey degrees of freedom for t-distribution inference."""
_bm_per_cell_dof: Dict[Tuple[Any, Any], float] = field(default_factory=dict, repr=False)
"""Per-cell Bell-McCaffrey Satterthwaite DOF (only populated for vcov_type='hc2_bm').
Used by group_time_effects[(g, t)] inference fields at fit time."""
_bm_artifacts: Optional[
Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[Tuple[Any, Any], int]]
] = field(default=None, repr=False)
"""(X_red, cluster_ids, bread_red, coef_idx_map) for hc2_bm; enables
lazy BM contrast-DOF computation in aggregate().
``X_red`` / ``bread_red`` are the REDUCED (kept-column) design and bread
matrix produced by ``_fit_ols`` after rank-deficient column drops — the
same subspace ``solve_ols`` returned non-NaN coefficients in.
``coef_idx_map`` maps each ``(g, t)`` cell present in
``group_time_effects`` to its column index in ``X_red``. Storing reduced
artifacts avoids the singular full-design bread that
``_compute_cr2_bm_contrast_dof`` would otherwise reject."""
_df_one_way: Optional[float] = field(default=None, repr=False)
"""Residual DOF (``n - rank(X)``) for one-way ``vcov_type in
{"classical","hc2"}`` paths (full-dummy, no survey). ``aggregate()``
uses this to thread R's ``lm()`` t-distribution into per-key
inference. ``None`` on hc1 / hc2_bm / surveyed paths (which use BM
DOF or ``_df_survey`` instead)."""
# ------------------------------------------------------------------ #
# Public methods #
# ------------------------------------------------------------------ #
[docs]
def aggregate(self, type: str, weights: str = "cell") -> "WooldridgeDiDResults": # noqa: A002
"""Compute and store one of the four jwdid_estat aggregation types.
Parameters
----------
type : "simple" | "group" | "calendar" | "event"
weights : "cell" | "cohort_share", default "cell"
Aggregation weighting scheme. ``"cell"`` (default) uses cell-
count ``n_{g,t}`` observation counts and matches Stata
``jwdid_estat``. ``"cohort_share"`` uses paper W2025 Eq. 7.4
``ω̂_g = N_g / Σ_{g'} N_{g'} M_{g'}`` for ``type="simple"`` and
Eq. 7.6 ``ω̂_{ge} = N_g / Σ_{g': g'+e ≤ T} N_{g'}`` for
``type="event"``. Both formulas reduce to ``N_g``-proportional
per-cell weights with the appropriate normalization. The two
schemes coincide on balanced panels with uniform within-cohort
cell counts (paper Section 7.5). The cohort-share scheme is
supported only for ``type="simple"`` and ``type="event"``; the
paper provides no explicit cohort-share formula for ``"group"``
or ``"calendar"`` aggregations and the library raises
``ValueError`` to preserve a fail-closed contract.
Returns self for chaining.
Notes
-----
When ``vcov_type == "hc2_bm"``, aggregated inference (t_stat / p_value /
conf_int) uses Bell-McCaffrey Satterthwaite contrast-specific DOFs
rather than the survey/None default. The BM DOFs are computed lazily
from ``_bm_artifacts`` via ``_compute_cr2_bm_contrast_dof`` and
fail-closed (NaN inference) when the helper raises or returns NaN —
per ``feedback_bm_contrast_dof_fail_closed``. The contrast column
is rebuilt under the active ``weights`` scheme so the BM DOF
reflects the actual weighting used by ATT + SE.
"""
valid = ("simple", "group", "calendar", "event")
if type not in valid:
raise ValueError(f"type must be one of {valid}, got {type!r}")
valid_weights = ("cell", "cohort_share")
if weights not in valid_weights:
raise ValueError(f"weights must be one of {valid_weights}, got {weights!r}")
if weights == "cohort_share" and type in ("group", "calendar"):
raise ValueError(
f"weights='cohort_share' is only supported for type='simple' "
f"(paper W2025 Eq. 7.4) and type='event' (paper W2025 Eq. 7.6). "
f"type={type!r} has no explicit paper closed-form cohort-share "
f"weighting; use weights='cell' (default) for "
f"jwdid_estat-style cell-count weighting."
)
gt = self.group_time_effects
cell_weights = self._gt_weights
n_g_per_cohort = self._n_g_per_cohort
vcov = self._gt_vcov
keys_ordered = self._gt_keys if self._gt_keys else sorted(gt.keys())
# Map each cell to its un-normalized weight under the active scheme.
# The aggregation step normalizes by ``w_total`` per aggregation
# key, so only relative magnitudes matter here. For the cohort_share
# scheme, the per-cell weight is ``N_g`` (paper Eqs. 7.4, 7.6)
# — the same per-cell value across simple-overall and event-time;
# the per-key normalization differs because the cell sets differ
# (event-time aggregations group cells with the same ``k = t - g``,
# so the denominator picks up only cohorts present at event-time
# ``k`` per paper Eq. 7.6).
def _cell_weight(c: Tuple[Any, Any]) -> float:
if weights == "cell":
return float(cell_weights.get(c, 0))
# cohort_share
return float(n_g_per_cohort.get(c[0], 0))
def _agg_se(w_vec: np.ndarray) -> float:
"""Delta-method SE for a linear combination w'β given full vcov."""
if vcov is None or len(w_vec) != vcov.shape[0]:
return float("nan")
return float(np.sqrt(max(w_vec @ vcov @ w_vec, 0.0)))
# Compute BM contrast DOFs lazily for hc2_bm. ``cells_by_key`` is an
# ordered mapping of aggregation_key -> list of (g, t) cells; the
# contrast for each key sums the per-cell one-hot vectors weighted
# by the active scheme's normalized per-cell weight. Returns a dict
# mapping aggregation_key -> df (or NaN on fail-closed). For
# non-hc2_bm, returns an empty dict (caller falls back to
# ``self._df_survey``). Rebuilds the contrast column under the
# active ``weights`` scheme so the BM DOF matches the actual SE
# computation.
def _bm_contrast_dofs_for(
cells_by_key: Dict[Any, List[Tuple[Any, Any]]],
) -> Dict[Any, float]:
if self.vcov_type != "hc2_bm" or self._bm_artifacts is None:
return {}
# ``X_red`` / ``bread_red`` are the REDUCED kept-column artifacts
# from ``_fit_ols`` (post rank-deficient drops). ``coef_idx_map``
# maps (g, t) → column index in ``X_red``. See
# ``_bm_artifacts`` docstring above for the rationale.
X_red, cluster_ids_full, bread_red, coef_idx_map = self._bm_artifacts
n_red = X_red.shape[1]
contrast_cols: List[np.ndarray] = []
agg_keys: List[Any] = []
for agg_key, cells in cells_by_key.items():
if not cells:
continue
w_total = sum(_cell_weight(c) for c in cells)
if w_total == 0:
continue
col = np.zeros(n_red)
contributed = False
for c in cells:
if c not in coef_idx_map:
continue
col[coef_idx_map[c]] = _cell_weight(c) / w_total
contributed = True
if not contributed:
continue
contrast_cols.append(col)
agg_keys.append(agg_key)
if not contrast_cols:
return {k: float("nan") for k in cells_by_key}
from diff_diff.linalg import _compute_cr2_bm_contrast_dof
contrasts_matrix = np.column_stack(contrast_cols)
dof_map: Dict[Any, float] = {}
try:
dof_vec = _compute_cr2_bm_contrast_dof(
X_red, cluster_ids_full, bread_red, contrasts_matrix
)
for i, k in enumerate(agg_keys):
candidate = float(dof_vec[i])
dof_map[k] = candidate if np.isfinite(candidate) else float("nan")
except (ValueError, np.linalg.LinAlgError) as exc:
warnings.warn(
f"WooldridgeDiDResults.aggregate({type!r}) could not "
f"compute Bell-McCaffrey contrast DOF "
f"({exc.__class__.__name__}: {exc}). "
"Affected aggregated inference (t_stat / p_value / "
"conf_int) will be NaN to preserve the hc2_bm contract.",
UserWarning,
stacklevel=3,
)
for k in agg_keys:
dof_map[k] = float("nan")
# Fill non-computed keys with NaN to fail-closed.
for k in cells_by_key:
dof_map.setdefault(k, float("nan"))
return dof_map
def _build_effect(
att: float, se: float, df_for_inference: Optional[float]
) -> Dict[str, Any]:
"""Build an effect dict using ``df_for_inference`` for the t-distribution.
When ``self.vcov_type == "hc2_bm"``, ``df_for_inference`` should be
the BM contrast DOF (NaN → fail-closed). For ``classical`` /
``hc2`` (one-way, no survey) the residual DOF ``self._df_one_way``
is used so per-key inference matches R ``lm()`` /
``coef_test()`` t-distribution. For hc1 / surveyed paths,
``self._df_survey`` (None → normal-theory) is used.
Under ``weights="cohort_share"`` (variable
``cohort_share_inference_fail_closed=True``), the inference
fields (t-stat / p-value / conf-int) are nulled to NaN
because the analytical SE is conditional-on-shares and
understates unconditional uncertainty per paper W2025
Section 7.5. The point estimate and conditional-on-shares
SE are still returned for reference.
"""
if cohort_share_inference_fail_closed:
return {
"att": att,
"se": se,
"t_stat": float("nan"),
"p_value": float("nan"),
"conf_int": (float("nan"), float("nan")),
}
if self.vcov_type == "hc2_bm":
if df_for_inference is None or not np.isfinite(df_for_inference):
return {
"att": att,
"se": se,
"t_stat": float("nan"),
"p_value": float("nan"),
"conf_int": (float("nan"), float("nan")),
}
t_stat, p_value, conf_int = safe_inference(
att, se, alpha=self.alpha, df=df_for_inference
)
elif (
self.vcov_type in ("classical", "hc2")
and self._df_one_way is not None
and np.isfinite(self._df_one_way)
):
t_stat, p_value, conf_int = safe_inference(
att, se, alpha=self.alpha, df=self._df_one_way
)
else:
t_stat, p_value, conf_int = safe_inference(
att, se, alpha=self.alpha, df=self._df_survey
)
return {
"att": att,
"se": se,
"t_stat": t_stat,
"p_value": p_value,
"conf_int": conf_int,
}
# Cohort-share scheme requires populated _n_g_per_cohort; raise an
# informative error rather than silently returning zero-weighted
# NaN aggregates.
if weights == "cohort_share" and not n_g_per_cohort:
raise ValueError(
"weights='cohort_share' requires per-cohort unit counts "
"(_n_g_per_cohort) populated at fit time; this Results "
"object has none. Re-fit with the current WooldridgeDiD "
"version, or use weights='cell' (default) on legacy fits."
)
# Survey + cohort_share composition is not yet supported. Codex R3
# P0 fix: ``_n_g_per_cohort`` is populated as raw ``unit.nunique()``
# counts, so composing design-weighted ATT estimates (survey TSL)
# with unweighted cohort shares targets a mixed estimand that is
# not paper W2025 Section 7's design-population cohort-share form.
# Design-consistent cohort totals (survey-weighted unit totals per
# cohort) require additional plumbing — fail-closed for now,
# tracked in TODO follow-up.
if weights == "cohort_share" and self.survey_metadata is not None:
raise ValueError(
"aggregate(weights='cohort_share') is not yet supported on "
"survey-weighted fits (survey_design is not None): the "
"cohort-share weights would compose design-weighted ATTs "
"with unweighted cohort shares, targeting a mixed estimand "
"that is not paper W2025 Section 7's design-population "
"cohort-share form. Design-consistent cohort totals are "
"deferred to a follow-up; use weights='cell' (default) "
"on survey-weighted fits."
)
# Cohort-share variance conditional-on-shares disclaimer (paper
# W2025 Section 7.5 / Eq. 7.4-7.6 discussion). The analytical SE
# computed below treats the cohort-share weights ``ω̂_g`` /
# ``ω̂_{ge}`` as fixed at their realized values, which means the
# SE understates the unconditional sampling uncertainty from
# estimating the shares themselves. Per `feedback_no_silent_failures`
# and codex R2 P1 fix, fail-closed on the inference fields
# (NaN out t-stat / p-value / conf-int) and emit a UserWarning
# explaining the conditional-on-shares contract. The POINT
# estimate ``att`` (paper Eq. 7.4 / 7.6 hand-calc form) and the
# ``se`` (conditional-on-shares delta method) are still computed
# and returned for reference, but the inferential machinery is
# nulled out until proper APE/GMM-style aggregate inference is
# derived (tracked in TODO).
cohort_share_inference_fail_closed = weights == "cohort_share"
if cohort_share_inference_fail_closed:
warnings.warn(
"weights='cohort_share' aggregation: the analytical SE and "
"inference (t-stat / p-value / conf-int) computed by "
"WooldridgeDiDResults.aggregate(..., weights='cohort_share') "
"treat the cohort-share weights ω̂_g / ω̂_{ge} as fixed; "
"this conditional-on-shares variance understates the "
"unconditional sampling uncertainty per paper W2025 "
"Section 7.5. The library fail-closes the t-stat / p-value "
"/ conf-int fields to NaN until proper APE/GMM-style "
"aggregate inference is derived (tracked in TODO). The "
"POINT estimate and conditional-on-shares SE are computed "
"and returned for reference; use weights='cell' (default) "
"for the analytical aggregation with full inference.",
UserWarning,
stacklevel=2,
)
if type == "simple":
# Bootstrap interaction guard: when ``_bootstrap_used`` was set
# by ``_fit_ols`` (the multiplier bootstrap overrode the
# analytical ``overall_*`` fields), the default
# ``weights="cell"`` path is a no-op (preserves bootstrap
# inference). The opt-in ``weights="cohort_share"`` path is not
# bootstrapped — re-fit with ``n_bootstrap=0`` to use the
# analytical cohort-share inference, or wait for the deferred
# bootstrap-cohort-share follow-up (tracked in TODO).
if self._bootstrap_used:
if weights == "cell":
return self
raise ValueError(
"aggregate(type='simple', weights='cohort_share') is "
"not supported on bootstrapped fits "
"(n_bootstrap > 0): the multiplier bootstrap was run "
"on the cell-count-weighted overall ATT at fit time, "
"and the cohort-share aggregation has no matching "
"bootstrap variant yet. Re-fit with n_bootstrap=0 to "
"use cohort-share + analytical inference."
)
# Recompute overall ATT + SE under the active weighting scheme.
# Under weights="cell" the result matches what fit() populated
# at machine precision (re-derived from the same cell weights);
# under weights="cohort_share" the overall ATT, SE, and BM
# contrast DOF (under hc2_bm) are recomputed with cohort-share
# per-cell weights per paper Eq. 7.4.
cells_simple = [(g, t) for (g, t) in keys_ordered if g > 0 and t >= g]
cells_by_simple: Dict[Any, List[Tuple[Any, Any]]] = {"simple": cells_simple}
dofs = _bm_contrast_dofs_for(cells_by_simple)
if cells_simple:
w_total = sum(_cell_weight(c) for c in cells_simple)
if w_total > 0:
att = sum(_cell_weight(c) * gt[c]["att"] for c in cells_simple) / w_total
w_vec = np.array(
[
_cell_weight(c) / w_total if c in cells_simple else 0.0
for c in keys_ordered
]
)
se = _agg_se(w_vec)
eff = _build_effect(att, se, dofs.get("simple"))
self.overall_att = eff["att"]
self.overall_se = eff["se"]
self.overall_t_stat = eff["t_stat"]
self.overall_p_value = eff["p_value"]
self.overall_conf_int = eff["conf_int"]
# Atomic metadata mutation — only after successful write.
self.aggregation_weights["simple"] = weights
elif type == "group":
cells_by_g: Dict[Any, List[Tuple[Any, Any]]] = {}
for g in self.groups:
cells_by_g[g] = [(g2, t) for (g2, t) in keys_ordered if g2 == g and t >= g]
dofs = _bm_contrast_dofs_for(cells_by_g)
result: Dict[Any, Dict] = {}
for g, cells in cells_by_g.items():
if not cells:
continue
w_total = sum(_cell_weight(c) for c in cells)
if w_total == 0:
continue
att = sum(_cell_weight(c) * gt[c]["att"] for c in cells) / w_total
w_vec = np.array(
[_cell_weight(c) / w_total if c in cells else 0.0 for c in keys_ordered]
)
se = _agg_se(w_vec)
result[g] = _build_effect(att, se, dofs.get(g))
self.group_effects = result
self.aggregation_weights["group"] = weights
elif type == "calendar":
cells_by_t: Dict[Any, List[Tuple[Any, Any]]] = {}
for t in self.time_periods:
cells_by_t[t] = [(g, t2) for (g, t2) in keys_ordered if t2 == t and t >= g]
dofs = _bm_contrast_dofs_for(cells_by_t)
result = {}
for t, cells in cells_by_t.items():
if not cells:
continue
w_total = sum(_cell_weight(c) for c in cells)
if w_total == 0:
continue
att = sum(_cell_weight(c) * gt[c]["att"] for c in cells) / w_total
w_vec = np.array(
[_cell_weight(c) / w_total if c in cells else 0.0 for c in keys_ordered]
)
se = _agg_se(w_vec)
result[t] = _build_effect(att, se, dofs.get(t))
self.calendar_effects = result
self.aggregation_weights["calendar"] = weights
elif type == "event":
# Paper W2025 Eq. 7.6 cohort-share-by-exposure weighting is
# defined for post-treatment exposure times (k >= 0) only;
# pre-treatment lead effects use a separate Eq. 7.7
# construction with ``nw_it`` weights that the library does
# not yet expose. Under ``weights="cohort_share"`` we
# restrict event aggregation to ``k >= 0`` to avoid
# silently applying Eq. 7.6 weights to negative-lead cells
# (codex R4 P1 fix). Under ``weights="cell"`` the full event
# range is preserved for backward compatibility (pre-period
# leads serve as placebos under OLS + never_treated).
if weights == "cohort_share":
eligible_pairs = [(g, t) for (g, t) in keys_ordered if t - g >= 0]
else:
eligible_pairs = list(keys_ordered)
all_k = sorted({t - g for (g, t) in eligible_pairs})
cells_by_k: Dict[int, List[Tuple[Any, Any]]] = {}
for k in all_k:
cells_by_k[k] = [(g, t) for (g, t) in eligible_pairs if t - g == k]
dofs = _bm_contrast_dofs_for(cells_by_k)
result = {}
for k, cells in cells_by_k.items():
if not cells:
continue
w_total = sum(_cell_weight(c) for c in cells)
if w_total == 0:
continue
att = sum(_cell_weight(c) * gt[c]["att"] for c in cells) / w_total
w_vec = np.array(
[_cell_weight(c) / w_total if c in cells else 0.0 for c in keys_ordered]
)
se = _agg_se(w_vec)
result[k] = _build_effect(att, se, dofs.get(k))
self.event_study_effects = result
self.aggregation_weights["event"] = weights
return self
[docs]
def summary(self, aggregation: str = "simple") -> str:
"""Print formatted summary table.
Parameters
----------
aggregation : which aggregation to display ("simple", "group", "calendar", "event")
"""
lines = [
"=" * 70,
" Wooldridge Extended Two-Way Fixed Effects (ETWFE) Results",
"=" * 70,
f"Method: {self.method}",
f"Control group: {self.control_group}",
f"Observations: {self.n_obs}",
f"Treated units: {self.n_treated_units}",
f"Control units: {self.n_control_units}",
f"Cohort trends: {self.cohort_trends}",
f"Aggregation w: {self.aggregation_weights.get(aggregation, 'cell')}",
"-" * 70,
]
if self.survey_metadata is not None:
from diff_diff.results import _format_survey_block
lines.extend(_format_survey_block(self.survey_metadata, 70))
lines.append("-" * 70)
# Conley spatial-HAC variance label (rendered only on the conley path;
# a full vcov-family label for all families is a separate follow-up).
if self.vcov_type == "conley":
from diff_diff.results import _format_vcov_label
_vlabel = _format_vcov_label(
self.vcov_type,
cluster_name=self.cluster_name,
n_clusters=self.n_clusters,
n_obs=self.n_obs,
conley_lag_cutoff=self.conley_lag_cutoff,
)
if _vlabel:
lines.append(f"Std. errors: {_vlabel}")
lines.append("-" * 70)
def _fmt_row(label: str, att: float, se: float, t: float, p: float, ci: Tuple) -> str:
from diff_diff.results import _get_significance_stars # type: ignore
stars = _get_significance_stars(p) if not np.isnan(p) else ""
ci_lo = f"{ci[0]:.4f}" if not np.isnan(ci[0]) else "NaN"
ci_hi = f"{ci[1]:.4f}" if not np.isnan(ci[1]) else "NaN"
return (
f"{label:<22} {att:>10.4f} {se:>10.4f} {t:>8.3f} "
f"{p:>8.4f}{stars} [{ci_lo}, {ci_hi}]"
)
ci_pct = f"{(1 - self.alpha) * 100:.0f}%"
header = (
f"{'Parameter':<22} {'Estimate':>10} {'Std. Err.':>10} "
f"{'t-stat':>8} {'P>|t|':>8} [{ci_pct} CI]"
)
lines.append(header)
lines.append("-" * 70)
if aggregation == "simple":
lines.append(
_fmt_row(
"ATT (simple)",
self.overall_att,
self.overall_se,
self.overall_t_stat,
self.overall_p_value,
self.overall_conf_int,
)
)
elif aggregation == "group" and self.group_effects:
for g, eff in sorted(self.group_effects.items()):
lines.append(
_fmt_row(
f"ATT(g={g})",
eff["att"],
eff["se"],
eff["t_stat"],
eff["p_value"],
eff["conf_int"],
)
)
elif aggregation == "calendar" and self.calendar_effects:
for t, eff in sorted(self.calendar_effects.items()):
lines.append(
_fmt_row(
f"ATT(t={t})",
eff["att"],
eff["se"],
eff["t_stat"],
eff["p_value"],
eff["conf_int"],
)
)
elif aggregation == "event" and self.event_study_effects:
for k, eff in sorted(self.event_study_effects.items()):
if k < -self.anticipation:
suffix = " [pre]"
elif k < 0:
suffix = " [antic]"
else:
suffix = ""
label = f"ATT(k={k})" + suffix
lines.append(
_fmt_row(
label,
eff["att"],
eff["se"],
eff["t_stat"],
eff["p_value"],
eff["conf_int"],
)
)
else:
lines.append(f" (call .aggregate({aggregation!r}) first)")
lines.append("=" * 70)
return "\n".join(lines)
[docs]
def to_dataframe(self, aggregation: str = "event") -> pd.DataFrame:
"""Export aggregated effects to a DataFrame.
Parameters
----------
aggregation : "simple" | "group" | "calendar" | "event" | "gt"
Use "gt" to export raw group-time effects.
"""
if aggregation == "gt":
rows = []
for (g, t), eff in sorted(self.group_time_effects.items()):
row = {"cohort": g, "time": t, "relative_period": t - g}
row.update(eff)
rows.append(row)
return pd.DataFrame(rows)
# Active weighting scheme for the requested aggregation surface
# (default "cell"; flips to "cohort_share" after an opt-in
# cohort-share aggregation on that surface). Stamped on every
# exported row so downstream consumers can tell which estimand
# the row represents without having to inspect the originating
# Results object.
active_weights = self.aggregation_weights.get(aggregation, "cell")
mapping = {
"simple": [
{
"label": "ATT",
"att": self.overall_att,
"se": self.overall_se,
"t_stat": self.overall_t_stat,
"p_value": self.overall_p_value,
"conf_int_lo": self.overall_conf_int[0],
"conf_int_hi": self.overall_conf_int[1],
"cohort_trends": self.cohort_trends,
"aggregation_weights": active_weights,
}
],
"group": [
{
"cohort": g,
**{k: v for k, v in eff.items() if k != "conf_int"},
"conf_int_lo": eff["conf_int"][0],
"conf_int_hi": eff["conf_int"][1],
"cohort_trends": self.cohort_trends,
"aggregation_weights": active_weights,
}
for g, eff in sorted((self.group_effects or {}).items())
],
"calendar": [
{
"time": t,
**{k: v for k, v in eff.items() if k != "conf_int"},
"conf_int_lo": eff["conf_int"][0],
"conf_int_hi": eff["conf_int"][1],
"cohort_trends": self.cohort_trends,
"aggregation_weights": active_weights,
}
for t, eff in sorted((self.calendar_effects or {}).items())
],
"event": [
{
"relative_period": k,
**{kk: vv for kk, vv in eff.items() if kk != "conf_int"},
"conf_int_lo": eff["conf_int"][0],
"conf_int_hi": eff["conf_int"][1],
"cohort_trends": self.cohort_trends,
"aggregation_weights": active_weights,
}
for k, eff in sorted((self.event_study_effects or {}).items())
],
}
rows = mapping.get(aggregation, [])
return pd.DataFrame(rows)
[docs]
def plot_event_study(self, weights: str = "cell", **kwargs) -> None:
"""Event study plot. Always calls ``aggregate('event', weights=weights)``.
Parameters
----------
weights : "cell" | "cohort_share", default "cell"
Aggregation weighting scheme threaded into the underlying
``aggregate("event", ...)`` call. ``"cohort_share"`` produces
paper W2025 Eq. 7.6 cohort-share-by-exposure weights
(post-treatment ``k >= 0`` only); inference fields are
fail-closed to NaN per the Section 7.5 conditional-on-shares
contract documented in REGISTRY, and the plot **suppresses
error bars / CI bands** to honor the fail-closed contract
(the conditional-on-shares SE would build a misleading
normal-theory CI in the plotter).
**kwargs
Forwarded to ``diff_diff.visualization.plot_event_study``.
Notes
-----
The wrapper unconditionally re-aggregates the event study under
the requested ``weights`` scheme. This avoids the stale-cache
hazard where a prior ``plot_event_study(weights="cohort_share")``
call would leave the cached ``event_study_effects`` restricted
to ``k >= 0`` (per the Eq. 7.6 scope), and a subsequent
``plot_event_study()`` (default ``weights="cell"``) call would
silently reuse the cohort-share-keyed cache instead of restoring
the full event range including pre-period placebo leads.
"""
# Always re-aggregate under the requested weighting scheme. The
# aggregate() method replaces ``event_study_effects`` in place
# per the existing contract, so this is cheap and avoids
# cohort_share→cell (or any cross-scheme) stale-cache bugs.
self.aggregate("event", weights=weights)
from diff_diff.visualization import plot_event_study # type: ignore
effects = {k: v["att"] for k, v in (self.event_study_effects or {}).items()}
if weights == "cohort_share":
# Honor the fail-closed inference contract per paper Section
# 7.5: the conditional-on-shares SE understates unconditional
# uncertainty, so passing the finite SE into the plotter
# would let it render a normal-theory CI that contradicts
# the NaN inference fields the aggregate() helper produces.
# Pass NaN SEs so the plotter suppresses error bars / CI
# bands. Locked by ``test_plot_event_study_cohort_share_suppresses_error_bars``.
se = {k: float("nan") for k in (self.event_study_effects or {})}
else:
se = {k: v["se"] for k, v in (self.event_study_effects or {}).items()}
plot_event_study(effects=effects, se=se, alpha=self.alpha, **kwargs)
# --- Inference-field aliases (balance/external-adapter compatibility) ---
@property
def att(self) -> float:
return self.overall_att
@property
def se(self) -> float:
return self.overall_se
@property
def conf_int(self) -> Tuple[float, float]:
return self.overall_conf_int
@property
def p_value(self) -> float:
return self.overall_p_value
@property
def t_stat(self) -> float:
return self.overall_t_stat
def __repr__(self) -> str:
n_gt = len(self.group_time_effects)
att_str = f"{self.overall_att:.4f}" if not np.isnan(self.overall_att) else "NaN"
se_str = f"{self.overall_se:.4f}" if not np.isnan(self.overall_se) else "NaN"
p_str = f"{self.overall_p_value:.4f}" if not np.isnan(self.overall_p_value) else "NaN"
# Surface the active simple aggregation scheme (the one that
# produced the printed ``overall_*`` values) + cohort_trends
# flag so repr is self-describing for downstream consumers.
simple_weights = self.aggregation_weights.get("simple", "cell")
return (
f"WooldridgeDiDResults("
f"ATT={att_str}, SE={se_str}, p={p_str}, "
f"n_gt={n_gt}, method={self.method!r}, "
f"cohort_trends={self.cohort_trends}, "
f"aggregation_weights={simple_weights!r})"
)