Source code for enhancesa.bootstrap

# TODO: Add more test statistics, a whole table preferably.

# Dependencies
import numpy as np
import pandas as pd



[docs]def bootstrap(X, iters=1): """ Estimate population mu and SE of a sample with boothstrap subset selection method. For a quick intro, got `here <https://sinablk.github.io/2019/02/13/resampling-methods.html>`_. Parameters ---------- X : an array/series object A fitted Statsmodels ols model. iters : int, optional The number of resampling iterations. Usually a large value, e.g. 1000 Returns ------- DataFrame or Series object Contains estimated population mean and stadnard deviation of :math:`n` samples from the the given ``x`` sample. Examples -------- >>> x = np.random.normal(size=100) >>> enhancesa.bootstrap(x, iters=1000) Estimated mean: -0.025309 Estimated SE: 0.095531 dtype: float64 """ # By convention, n is usually length of the sample n = len(X) # Random n index numbers idx = np.random.randint(0, n , (iters, n)) # Stores n random samples, each of size n samples = [] means = [] std_devs = [] for i in range(0, len(idx)): samples.append(X[idx[i]]) means.append(samples[i].mean()) std_devs.append(np.std(samples[i])) total_mean = np.mean(means) se = np.mean(std_devs)/np.sqrt(n) return pd.Series({'Estimated mean': total_mean, 'Estimated SE': se}, dtype='float64') # If dtype not given, dtype is inferred.
# Add confidence interval this way: # >>> results = bootstrap(boston['medv'], iters=1000) # >>> print('Confidence interval:', results[0]-2*results[1], results[0]+2*results[1])