Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/ExpectedReturns.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ superior models and feed them into the optimizer.
the mean historical return. However, if you plan on rebalancing much more frequently,
there is a case to be made for lowering the span in order to capture recent trends.

.. autofunction:: james_stein_return

The James-Stein estimator shrinks each asset's sample mean return towards
the cross-sectional (grand) mean, reducing the estimation error that
mean-variance optimizers are so sensitive to. This is most valuable when
there are many assets relative to the length of the price history. The
shrinkage intensity can be set manually, or estimated from the data.

.. autofunction:: capm_return

.. autofunction:: returns_from_prices
Expand Down
108 changes: 108 additions & 0 deletions pypfopt/expected_returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
- general return model function, allowing you to run any return model from one function.
- mean historical return
- exponentially weighted mean historical return
- James-Stein shrinkage estimate of returns
- CAPM estimate of returns

Additionally, we provide utility functions to convert from returns to prices and vice-versa.
Expand Down Expand Up @@ -104,6 +105,7 @@ def return_model(prices, method="mean_historical_return", **kwargs):

- ``mean_historical_return``
- ``ema_historical_return``
- ``james_stein_return``
- ``capm_return``

Raises
Expand All @@ -122,6 +124,8 @@ def return_model(prices, method="mean_historical_return", **kwargs):
return ema_historical_return(prices, **kwargs)
elif method == "capm_return":
return capm_return(prices, **kwargs)
elif method == "james_stein_return":
return james_stein_return(prices, **kwargs)
else:
raise NotImplementedError("Return model {} not implemented".format(method))

Expand Down Expand Up @@ -223,6 +227,110 @@ def ema_historical_return(
return returns.ewm(span=span).mean().iloc[-1] * frequency


def james_stein_return(
prices,
returns_data=False,
shrinkage=None,
compounding=True,
frequency=252,
log_returns=False,
):
"""
Estimate annualised expected returns using James-Stein shrinkage.

The per-asset sample estimate of expected return is shrunk towards the
cross-sectional (grand) mean. This reduces estimation error in the mean
vector, which mean-variance optimisers are notoriously sensitive to, and
is especially helpful when the number of assets is large relative to the
length of the available price history.

Parameters
----------
prices : pd.DataFrame
adjusted closing prices of the asset, each row is a date
and each column is a ticker/id.
returns_data : bool, optional
if true, the first argument is returns instead of prices.
These **should not** be log returns. Defaults to False.
shrinkage : float, optional
shrinkage intensity in [0, 1]. ``0`` returns the sample estimate
(identical to :func:`mean_historical_return`) and ``1`` returns the
grand mean for every asset. If ``None`` (default), the intensity is
estimated from the data via a James-Stein/SURE rule.
compounding : bool, optional
computes geometric mean returns if True,
arithmetic otherwise. Defaults to True.
frequency : int, optional
number of time periods in a year, defaults to 252 (the number
of trading days in a year)
log_returns : bool, optional
whether to compute using log returns. Defaults to False.

Returns
-------
pd.Series
annualised James-Stein shrinkage estimate of expected returns

Raises
------
ValueError
if ``shrinkage`` is supplied and is not in [0, 1]

References
----------
Stein, C. (1956). Inadmissibility of the usual estimator for the mean of a
multivariate normal distribution. *Proc. Third Berkeley Symp. on Math.
Statist. and Prob.*, Vol. 1, 197-206.
"""
if not isinstance(prices, pd.DataFrame):
warnings.warn("prices are not in a dataframe", RuntimeWarning)
prices = pd.DataFrame(prices)

if shrinkage is not None and not 0 <= shrinkage <= 1:
raise ValueError("shrinkage must be in [0, 1], got {}".format(shrinkage))

if returns_data:
returns = prices
else:
returns = returns_from_prices(prices, log_returns)

_check_returns(returns)

# Per-asset annualised expected returns: identical to mean_historical_return,
# so that shrinkage=0 recovers the sample estimate exactly.
if compounding:
mu = (1 + returns).prod() ** (frequency / returns.count()) - 1
else:
mu = returns.mean() * frequency
grand_mean = mu.mean()

if shrinkage is None:
# Base the James-Stein dimensionality only on assets that carry usable
# information. All-NaN or single-observation columns have no meaningful
# mean/variance and would otherwise inflate ``p`` and bias the shrinkage
# intensity.
valid = (returns.count() >= 2) & np.isfinite(mu)
p = int(valid.sum())
dispersion = float(((mu[valid] - grand_mean) ** 2).sum())
if p <= 2 or dispersion <= 1e-12:
# Degenerate case: no meaningful cross-sectional dispersion to shrink,
# or too few assets for the James-Stein rule. Fall back to full
# shrinkage when all means coincide, otherwise none.
shrinkage = 1.0 if dispersion <= 1e-12 else 0.0
else:
# Variance of each annualised mean estimator (~ frequency**2 * var / n),
# averaged across the valid assets. For the arithmetic (non-compounding)
# mean this frequency**2 factor cancels against the annualised
# dispersion, making the intensity frequency-invariant; for the
# geometric/compounding mean the invariance holds only approximately.
tau_squared = (frequency**2 * returns.var(ddof=1) / returns.count())[
valid
].mean()
shrinkage = float(np.clip((p - 2) * tau_squared / dispersion, 0.0, 1.0))

return shrinkage * grand_mean + (1 - shrinkage) * mu


def capm_return(
prices,
market_prices=None,
Expand Down
94 changes: 94 additions & 0 deletions tests/test_expected_returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,3 +299,97 @@ def test_log_return_passthrough():
except AssertionError:
return
assert False


def test_james_stein_return():
df = get_data()
js_return = expected_returns.james_stein_return(df)

assert isinstance(js_return, pd.Series)
assert list(js_return.index) == list(df.columns)
assert js_return.notnull().all()
assert js_return.dtype == "float64"


def test_james_stein_return_shrinkage():
df = get_data()

# With shrinkage=0 we recover the sample estimate (mean_historical_return).
js_return_0 = expected_returns.james_stein_return(df, shrinkage=0.0)
sample_mean = expected_returns.mean_historical_return(df)
np.testing.assert_array_almost_equal(js_return_0.values, sample_mean.values)

# With shrinkage=1 every asset collapses to the grand mean.
js_return_1 = expected_returns.james_stein_return(df, shrinkage=1.0)
assert (js_return_1 == js_return_1.iloc[0]).all()

# A manual intermediate intensity still yields a valid Series.
js_return_05 = expected_returns.james_stein_return(df, shrinkage=0.5)
assert isinstance(js_return_05, pd.Series)


def test_james_stein_return_auto_shrinkage():
df = get_data()
js_return_auto = expected_returns.james_stein_return(df, shrinkage=None)
assert isinstance(js_return_auto, pd.Series)
assert js_return_auto.notnull().all()


def test_james_stein_return_invalid_shrinkage():
df = get_data()
with pytest.raises(ValueError):
expected_returns.james_stein_return(df, shrinkage=-0.1)
with pytest.raises(ValueError):
expected_returns.james_stein_return(df, shrinkage=1.5)


def test_james_stein_return_compounding():
df = get_data()
js_compound = expected_returns.james_stein_return(df, compounding=True)
js_simple = expected_returns.james_stein_return(df, compounding=False)
assert isinstance(js_compound, pd.Series)
assert isinstance(js_simple, pd.Series)
assert js_compound.notnull().all()
assert js_simple.notnull().all()


def test_james_stein_return_frequency():
df = get_data()
js_annual = expected_returns.james_stein_return(
df, compounding=False, frequency=252
)
js_monthly = expected_returns.james_stein_return(
df, compounding=False, frequency=12
)
np.testing.assert_array_almost_equal(js_annual / 252, js_monthly / 12)


def test_james_stein_return_with_return_model():
df = get_data()
js_direct = expected_returns.james_stein_return(df, shrinkage=0.3)
js_dispatch = expected_returns.return_model(
df, method="james_stein_return", shrinkage=0.3
)
pd.testing.assert_series_equal(js_direct, js_dispatch)


def test_james_stein_return_data_warning():
df = get_data()
with pytest.warns(RuntimeWarning):
js_return = expected_returns.james_stein_return(df.to_numpy())
assert isinstance(js_return, pd.Series)


def test_james_stein_return_log_returns():
df = get_data()
js_normal = expected_returns.james_stein_return(df, log_returns=False)
js_log = expected_returns.james_stein_return(df, log_returns=True)
assert not (js_normal == js_log).all()


def test_james_stein_return_returns_data():
df = get_data()
returns = expected_returns.returns_from_prices(df)
js_prices = expected_returns.james_stein_return(df)
js_returns = expected_returns.james_stein_return(returns, returns_data=True)
pd.testing.assert_series_equal(js_prices, js_returns)