#This code is issued under the Creative Commons CC0 Public Domain Dedication import numpy as np import matplotlib.pyplot as plt from scipy import stats def ecdf(x): x_sort = np.sort(x) y = np.arange(1, len(x_sort)+1)/float(len(x_sort)) return x_sort, y def DKW_bounds(y, n, alpha=0.05): # Compute Dvoretzky–Kiefer–Wolfowitz inequality eps = np.sqrt(0.5 * np.log(2.0/alpha) /n) lower = np.maximum(y - eps, 0) upper = np.minimum(y + eps, 1) return lower, upper def pointwise_bound(y, n, alpha=0.05): # Compute confidence intervals from an eCDF. # Clopper-Pearson interval lower = stats.beta.ppf(alpha/2, y*n, (1-y)*n + 1) upper = stats.beta.ppf(1-alpha/2, y*n + 1, (1-y)*n) # Primarily used for mapping nan to 0 or 1 lower = np.fmax(lower, 0) upper = np.fmin(upper, 1) return lower, upper num_samps = 30 x = np.linspace(-4,4, num=500) y = stats.norm.cdf(x) x_rand = np.random.randn(num_samps) x_ecdf, y_ecdf = ecdf(x_rand) # Ensure the eCDF extends to the edges of the graph for the bounds x_ecdf, y_ecdf = np.append([-4], x_ecdf), np.append([0], y_ecdf) x_ecdf, y_ecdf = np.append(x_ecdf, [4]), np.append(y_ecdf, [1]) # Pass in number of points because you extended the length of x_ecdf lower, upper = DKW_bounds(y_ecdf, n=num_samps) lower_pw, upper_pw = pointwise_bound(y_ecdf, n=num_samps) fig, axes = plt.subplots(figsize=(4,3.2)) axes.plot(x,y, '-g', linewidth=1.5, color='lightblue') #Plot gets too crowded if you show the actual ecdf #axes.step(x_ecdf, y_ecdf, 'k-', where='post', linewidth=1.5, color='lightblue') axes.step(x_ecdf, lower, '-b', where='post', linewidth=1.5, color='purple') axes.step(x_ecdf, upper, '-b', where='post', linewidth=1.5, color='purple') axes.step(x_ecdf, lower_pw, '-b', where='post', linewidth=1.5, color='orange') axes.step(x_ecdf, upper_pw, '-b', where='post', linewidth=1.5, color='orange') axes.set_xlim(-3,3) axes.grid() axes.set_ylabel('P(x)') axes.set_xlabel('x') fig.savefig('DKW_bounds.svg')