PyPortfolio · jrile018 · Jul 3, 2026 · Jul 3, 2026
diff --git a/docs/ExpectedReturns.rst b/docs/ExpectedReturns.rst
@@ -46,6 +46,14 @@ superior models and feed them into the optimizer.
         the mean historical return. However, if you plan on rebalancing much more frequently,
         there is a case to be made for lowering the span in order to capture recent trends.
 
+    .. autofunction:: james_stein_return
+
+        The James-Stein estimator shrinks each asset's sample mean return towards
+        the cross-sectional (grand) mean, reducing the estimation error that
+        mean-variance optimizers are so sensitive to. This is most valuable when
+        there are many assets relative to the length of the price history. The
+        shrinkage intensity can be set manually, or estimated from the data.
+
     .. autofunction:: capm_return
 
     .. autofunction:: returns_from_prices

diff --git a/pypfopt/expected_returns.py b/pypfopt/expected_returns.py
@@ -15,6 +15,7 @@
     - general return model function, allowing you to run any return model from one function.
     - mean historical return
     - exponentially weighted mean historical return
+    - James-Stein shrinkage estimate of returns
     - CAPM estimate of returns
 
 Additionally, we provide utility functions to convert from returns to prices and vice-versa.
@@ -104,6 +105,7 @@ def return_model(prices, method="mean_historical_return", **kwargs):
 
         - ``mean_historical_return``
         - ``ema_historical_return``
+        - ``james_stein_return``
         - ``capm_return``
 
     Raises
@@ -122,6 +124,8 @@ def return_model(prices, method="mean_historical_return", **kwargs):
         return ema_historical_return(prices, **kwargs)
     elif method == "capm_return":
         return capm_return(prices, **kwargs)
+    elif method == "james_stein_return":
+        return james_stein_return(prices, **kwargs)
     else:
         raise NotImplementedError("Return model {} not implemented".format(method))
 
@@ -223,6 +227,110 @@ def ema_historical_return(
         return returns.ewm(span=span).mean().iloc[-1] * frequency
 
 
+def james_stein_return(
+    prices,
+    returns_data=False,
+    shrinkage=None,
+    compounding=True,
+    frequency=252,
+    log_returns=False,
+):
+    """
+    Estimate annualised expected returns using James-Stein shrinkage.
+
+    The per-asset sample estimate of expected return is shrunk towards the
+    cross-sectional (grand) mean. This reduces estimation error in the mean
+    vector, which mean-variance optimisers are notoriously sensitive to, and
+    is especially helpful when the number of assets is large relative to the
+    length of the available price history.
+
+    Parameters
+    ----------
+    prices : pd.DataFrame
+        adjusted closing prices of the asset, each row is a date
+        and each column is a ticker/id.
+    returns_data : bool, optional
+        if true, the first argument is returns instead of prices.
+        These **should not** be log returns. Defaults to False.
+    shrinkage : float, optional
+        shrinkage intensity in [0, 1]. ``0`` returns the sample estimate
+        (identical to :func:`mean_historical_return`) and ``1`` returns the
+        grand mean for every asset. If ``None`` (default), the intensity is
+        estimated from the data via a James-Stein/SURE rule.
+    compounding : bool, optional
+        computes geometric mean returns if True,
+        arithmetic otherwise. Defaults to True.
+    frequency : int, optional
+        number of time periods in a year, defaults to 252 (the number
+        of trading days in a year)
+    log_returns : bool, optional
+        whether to compute using log returns. Defaults to False.
+
+    Returns
+    -------
+    pd.Series
+        annualised James-Stein shrinkage estimate of expected returns
+
+    Raises
+    ------
+    ValueError
+        if ``shrinkage`` is supplied and is not in [0, 1]
+
+    References
+    ----------
+    Stein, C. (1956). Inadmissibility of the usual estimator for the mean of a
+    multivariate normal distribution. *Proc. Third Berkeley Symp. on Math.
+    Statist. and Prob.*, Vol. 1, 197-206.
+    """
+    if not isinstance(prices, pd.DataFrame):
+        warnings.warn("prices are not in a dataframe", RuntimeWarning)
+        prices = pd.DataFrame(prices)
+
+    if shrinkage is not None and not 0 <= shrinkage <= 1:
+        raise ValueError("shrinkage must be in [0, 1], got {}".format(shrinkage))
+
+    if returns_data:
+        returns = prices
+    else:
+        returns = returns_from_prices(prices, log_returns)
+
+    _check_returns(returns)
+
+    # Per-asset annualised expected returns: identical to mean_historical_return,
+    # so that shrinkage=0 recovers the sample estimate exactly.
+    if compounding:
+        mu = (1 + returns).prod() ** (frequency / returns.count()) - 1
+    else:
+        mu = returns.mean() * frequency
+    grand_mean = mu.mean()
+
+    if shrinkage is None:
+        # Base the James-Stein dimensionality only on assets that carry usable
+        # information. All-NaN or single-observation columns have no meaningful
+        # mean/variance and would otherwise inflate ``p`` and bias the shrinkage
+        # intensity.
+        valid = (returns.count() >= 2) & np.isfinite(mu)
+        p = int(valid.sum())
+        dispersion = float(((mu[valid] - grand_mean) ** 2).sum())
+        if p <= 2 or dispersion <= 1e-12:
+            # Degenerate case: no meaningful cross-sectional dispersion to shrink,
+            # or too few assets for the James-Stein rule. Fall back to full
+            # shrinkage when all means coincide, otherwise none.
+            shrinkage = 1.0 if dispersion <= 1e-12 else 0.0
+        else:
+            # Variance of each annualised mean estimator (~ frequency**2 * var / n),
+            # averaged across the valid assets. For the arithmetic (non-compounding)
+            # mean this frequency**2 factor cancels against the annualised
+            # dispersion, making the intensity frequency-invariant; for the
+            # geometric/compounding mean the invariance holds only approximately.
+            tau_squared = (frequency**2 * returns.var(ddof=1) / returns.count())[
+                valid
+            ].mean()
+            shrinkage = float(np.clip((p - 2) * tau_squared / dispersion, 0.0, 1.0))
+
+    return shrinkage * grand_mean + (1 - shrinkage) * mu
+
+
 def capm_return(
     prices,
     market_prices=None,

diff --git a/tests/test_expected_returns.py b/tests/test_expected_returns.py
@@ -299,3 +299,97 @@ def test_log_return_passthrough():
         except AssertionError:
             return
         assert False
+
+
+def test_james_stein_return():
+    df = get_data()
+    js_return = expected_returns.james_stein_return(df)
+
+    assert isinstance(js_return, pd.Series)
+    assert list(js_return.index) == list(df.columns)
+    assert js_return.notnull().all()
+    assert js_return.dtype == "float64"
+
+
+def test_james_stein_return_shrinkage():
+    df = get_data()
+
+    # With shrinkage=0 we recover the sample estimate (mean_historical_return).
+    js_return_0 = expected_returns.james_stein_return(df, shrinkage=0.0)
+    sample_mean = expected_returns.mean_historical_return(df)
+    np.testing.assert_array_almost_equal(js_return_0.values, sample_mean.values)
+
+    # With shrinkage=1 every asset collapses to the grand mean.
+    js_return_1 = expected_returns.james_stein_return(df, shrinkage=1.0)
+    assert (js_return_1 == js_return_1.iloc[0]).all()
+
+    # A manual intermediate intensity still yields a valid Series.
+    js_return_05 = expected_returns.james_stein_return(df, shrinkage=0.5)
+    assert isinstance(js_return_05, pd.Series)
+
+
+def test_james_stein_return_auto_shrinkage():
+    df = get_data()
+    js_return_auto = expected_returns.james_stein_return(df, shrinkage=None)
+    assert isinstance(js_return_auto, pd.Series)
+    assert js_return_auto.notnull().all()
+
+
+def test_james_stein_return_invalid_shrinkage():
+    df = get_data()
+    with pytest.raises(ValueError):
+        expected_returns.james_stein_return(df, shrinkage=-0.1)
+    with pytest.raises(ValueError):
+        expected_returns.james_stein_return(df, shrinkage=1.5)
+
+
+def test_james_stein_return_compounding():
+    df = get_data()
+    js_compound = expected_returns.james_stein_return(df, compounding=True)
+    js_simple = expected_returns.james_stein_return(df, compounding=False)
+    assert isinstance(js_compound, pd.Series)
+    assert isinstance(js_simple, pd.Series)
+    assert js_compound.notnull().all()
+    assert js_simple.notnull().all()
+
+
+def test_james_stein_return_frequency():
+    df = get_data()
+    js_annual = expected_returns.james_stein_return(
+        df, compounding=False, frequency=252
+    )
+    js_monthly = expected_returns.james_stein_return(
+        df, compounding=False, frequency=12
+    )
+    np.testing.assert_array_almost_equal(js_annual / 252, js_monthly / 12)
+
+
+def test_james_stein_return_with_return_model():
+    df = get_data()
+    js_direct = expected_returns.james_stein_return(df, shrinkage=0.3)
+    js_dispatch = expected_returns.return_model(
+        df, method="james_stein_return", shrinkage=0.3
+    )
+    pd.testing.assert_series_equal(js_direct, js_dispatch)
+
+
+def test_james_stein_return_data_warning():
+    df = get_data()
+    with pytest.warns(RuntimeWarning):
+        js_return = expected_returns.james_stein_return(df.to_numpy())
+        assert isinstance(js_return, pd.Series)
+
+
+def test_james_stein_return_log_returns():
+    df = get_data()
+    js_normal = expected_returns.james_stein_return(df, log_returns=False)
+    js_log = expected_returns.james_stein_return(df, log_returns=True)
+    assert not (js_normal == js_log).all()
+
+
+def test_james_stein_return_returns_data():
+    df = get_data()
+    returns = expected_returns.returns_from_prices(df)
+    js_prices = expected_returns.james_stein_return(df)
+    js_returns = expected_returns.james_stein_return(returns, returns_data=True)
+    pd.testing.assert_series_equal(js_prices, js_returns)