uber · aman-coder03 · Mar 7, 2026 · Mar 7, 2026 · Mar 10, 2026 · Mar 14, 2026
diff --git a/causalml/inference/meta/tlearner.py b/causalml/inference/meta/tlearner.py
@@ -62,20 +62,38 @@ def __init__(
 
         self.ate_alpha = ate_alpha
         self.control_name = control_name
+        self.bootstrap_models_ = None
 
     def __repr__(self):
         return "{}(model_c={}, model_t={})".format(
             self.__class__.__name__, self.model_c.__repr__(), self.model_t.__repr__()
         )
 
     @ignore_warnings(category=ConvergenceWarning)
-    def fit(self, X, treatment, y, p=None):
+    def fit(
+        self,
+        X,
+        treatment,
+        y,
+        p=None,
+        store_bootstraps=False,
+        n_bootstraps=1000,
+        bootstrap_size=10000,
+        random_state=None,
+    ):
         """Fit the inference model
 
         Args:
             X (np.matrix or np.array or pd.Dataframe): a feature matrix
             treatment (np.array or pd.Series): a treatment vector
             y (np.array or pd.Series): an outcome vector
+            p: unused, kept for API consistency
+            store_bootstraps (bool, optional): if True, trains a bootstrap ensemble
+                during fit and stores it in self.bootstrap_models_ for post-fit CI
+                estimation via predict(return_ci=True). Default: False.
+            n_bootstraps (int, optional): number of bootstrap iterations. Default: 1000.
+            bootstrap_size (int, optional): number of samples per bootstrap. Default: 10000.
+            random_state (int, optional): random seed for reproducible bootstrap sampling.
         """
         X, treatment, y = convert_pd_to_np(X, treatment, y)
         check_treatment_vector(treatment, self.control_name)
@@ -95,20 +113,64 @@ def fit(self, X, treatment, y, p=None):
             self.models_c[group].fit(X_filt[w == 0], y_filt[w == 0])
             self.models_t[group].fit(X_filt[w == 1], y_filt[w == 1])
 
+        if store_bootstraps:
+            rng = np.random.RandomState(random_state)
+            logger.info(
+                "Storing bootstrap ensemble ({} iterations)".format(n_bootstraps)
+            )
+            self.bootstrap_models_ = []
+            for i in tqdm(range(n_bootstraps)):
+                idxs = rng.choice(np.arange(X.shape[0]), size=bootstrap_size)
+                X_b, treatment_b, y_b = X[idxs], treatment[idxs], y[idxs]
+                models_c_b = {group: deepcopy(self.model_c) for group in self.t_groups}
+                models_t_b = {group: deepcopy(self.model_t) for group in self.t_groups}
+                for group in self.t_groups:
+                    mask = (treatment_b == group) | (treatment_b == self.control_name)
+                    treatment_filt = treatment_b[mask]
+                    X_filt = X_b[mask]
+                    y_filt = y_b[mask]
+                    w = (treatment_filt == group).astype(int)
+                    if w.sum() == 0 or (w == 0).sum() == 0:
+                        models_c_b[group] = self.models_c[group]
+                        models_t_b[group] = self.models_t[group]
+                        continue
+                    models_c_b[group].fit(X_filt[w == 0], y_filt[w == 0])
+                    models_t_b[group].fit(X_filt[w == 1], y_filt[w == 1])
+                self.bootstrap_models_.append((models_c_b, models_t_b))
+        else:
+            self.bootstrap_models_ = None
+
     def predict(
-        self, X, treatment=None, y=None, p=None, return_components=False, verbose=True
+        self,
+        X,
+        treatment=None,
+        y=None,
+        p=None,
+        return_components=False,
+        verbose=True,
+        return_ci=False,
     ):
         """Predict treatment effects.
 
         Args:
             X (np.matrix or np.array or pd.Dataframe): a feature matrix
             treatment (np.array or pd.Series, optional): a treatment vector
             y (np.array or pd.Series, optional): an outcome vector
-            return_components (bool, optional): whether to return outcome for treatment and control seperately
+            return_components (bool, optional): whether to return outcome for
+                treatment and control separately
             verbose (bool, optional): whether to output progress logs
+            return_ci (bool, optional): whether to return confidence intervals
+                using the stored bootstrap ensemble. Requires fit() to have been
+                called with store_bootstraps=True. CI width is controlled by
+                self.ate_alpha set at init time.
         Returns:
-            (numpy.ndarray): Predictions of treatment effects.
+            (numpy.ndarray): Predictions of treatment effects. If return_ci=True,
+                returns (te, te_lower, te_upper) each of shape [n_samples, n_treatment].
+                return_ci=True and return_components=True cannot be used together.
         """
+        if return_ci and return_components:
+            raise ValueError("return_ci and return_components cannot both be True.")
+
         X, treatment, y = convert_pd_to_np(X, treatment, y)
         yhat_cs = {}
         yhat_ts = {}
@@ -136,6 +198,25 @@ def predict(
         for i, group in enumerate(self.t_groups):
             te[:, i] = yhat_ts[group] - yhat_cs[group]
 
+        if return_ci:
+            if self.bootstrap_models_ is None:
+                raise ValueError(
+                    "No bootstrap ensemble found. Call fit(..., store_bootstraps=True) first."
+                )
+            te_bootstraps = np.zeros(
+                (X.shape[0], self.t_groups.shape[0], len(self.bootstrap_models_))
+            )
+            for b, (models_c_b, models_t_b) in enumerate(self.bootstrap_models_):
+                for i, group in enumerate(self.t_groups):
+                    te_bootstraps[:, i, b] = models_t_b[group].predict(X) - models_c_b[
+                        group
+                    ].predict(X)
+            te_lower = np.percentile(te_bootstraps, (self.ate_alpha / 2) * 100, axis=2)
+            te_upper = np.percentile(
+                te_bootstraps, (1 - self.ate_alpha / 2) * 100, axis=2
+            )
+            return te, te_lower, te_upper
+
         if not return_components:
             return te
         else:

diff --git a/tests/test_meta_learners.py b/tests/test_meta_learners.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+import pytest
 
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import LogisticRegression
@@ -1220,3 +1221,55 @@ def test_BaseDRClassifier(generate_classification_data):
 
     te_separate = learner_separate.fit_predict(X=X, treatment=treatment, y=y)
     assert te_separate.shape == te.shape
+
+
+def test_BaseTLearner_predict_return_ci(generate_regression_data):
+    y, X, treatment, tau, b, e = generate_regression_data()
+
+    learner = BaseTRegressor(learner=LinearRegression(), control_name=0)
+
+    # Test 1: store_bootstraps=True then predict with return_ci=True
+    learner.fit(
+        X,
+        treatment,
+        y,
+        store_bootstraps=True,
+        n_bootstraps=50,
+        bootstrap_size=500,
+        random_state=RANDOM_SEED,
+    )
+    tau_pred, lb, ub = learner.predict(X, return_ci=True)
+
+    assert tau_pred.shape == (X.shape[0], len(learner.t_groups))
+    assert lb.shape == tau_pred.shape
+    assert ub.shape == tau_pred.shape
+    assert (lb <= ub).all()
+
+    # Test 2: ValueError without store_bootstraps
+    learner2 = BaseTRegressor(learner=LinearRegression(), control_name=0)
+    learner2.fit(X, treatment, y)
+    with pytest.raises(ValueError):
+        learner2.predict(X, return_ci=True)
+
+    # Test 3: ValueError when return_ci and return_components both True
+    with pytest.raises(ValueError):
+        learner.predict(X, return_ci=True, return_components=True)
+
+    # Test 4: old API unchanged
+    tau_plain = learner.predict(X)
+    assert tau_plain.shape == (X.shape[0], len(learner.t_groups))
+
+    # Test 5: reproducibility via random_state
+    learner3 = BaseTRegressor(learner=LinearRegression(), control_name=0)
+    learner3.fit(
+        X,
+        treatment,
+        y,
+        store_bootstraps=True,
+        n_bootstraps=50,
+        bootstrap_size=500,
+        random_state=RANDOM_SEED,
+    )
+    tau2, lb2, ub2 = learner3.predict(X, return_ci=True)
+    np.testing.assert_array_equal(lb, lb2)
+    np.testing.assert_array_equal(ub, ub2)