import numpy as np import matplotlib.pyplot as plt import scipy.special as sp ## Sample size. n = 50 ## Predictor values. XV = np.random.uniform(low=-4, high=4, size=n) XV.sort() ## Design matrix. X = np.ones((n,2)) X[:,1] = XV ## True coefficients. beta = np.array([0, 1.], dtype=np.float64) ## True response values. EY = np.dot(X, beta) ## Observed response values. Y = EY + np.random.normal(size=n)*np.sqrt(20) ## Get the coefficient estimates. u,s,vt = np.linalg.svd(X,0) v = np.transpose(vt) bhat = np.dot(v, np.dot(np.transpose(u), Y)/s) ## The fitted values. Yhat = np.dot(X, bhat) ## The MSE and RMSE. MSE = ((Y-EY)**2).sum()/(n-X.shape[1]) s = np.sqrt(MSE) ## These multipliers are used in constructing the intervals. XtX = np.dot(np.transpose(X), X) V = [np.dot(X[i,:], np.linalg.solve(XtX, X[i,:])) for i in range(n)] V = np.array(V) ## The F quantile used in constructing the Scheffe interval. QF = sp.fdtri(X.shape[1], n-X.shape[1], 0.95) ## The lower and upper bounds of the Scheffe band. D = s*np.sqrt(X.shape[1]*QF*V) LB,UB = Yhat-D,Yhat+D ## The lower and upper bounds of the pointwise band. D = s*np.sqrt(2*V) LBP,UBP = Yhat-D,Yhat+D ## Make the plot. plt.clf() plt.plot(XV, Y, 'o', ms=3, color='grey') plt.hold(True) a = plt.plot(XV, EY, '-', color='black') b = plt.plot(XV, LB, '-', color='red') plt.plot(XV, UB, '-', color='red') c = plt.plot(XV, LBP, '-', color='blue') plt.plot(XV, UBP, '-', color='blue') d = plt.plot(XV, Yhat, '-', color='green') B = plt.legend( (a,d,b,c), ("Truth", "Estimate", "95% simultaneous CB",\ "95% pointwise CB"), 'lower left') B.draw_frame(False) plt.ylim([-20,15]) plt.gca().set_yticks([-20,-10,0,10,20]) plt.xlim([-4,4]) plt.gca().set_xticks([-4,-2,0,2,4]) plt.xlabel("X") plt.ylabel("Y") plt.savefig("regression_confidence_band.png") plt.savefig("regression_confidence_band.svg")