data = pd.read_csv('advertising.csv')
data.head()

TV = data['TV'].to_numpy()
sales = data['sales'].to_numpy()

plt.scatter(TV, sales, s=5, c='k')
plt.xlabel('TV'), plt.ylabel('sales')
plt.show()

def knnregression(x,y,k,xnew):
    n = len(x)
    closest = np.argsort([np.absolute(x[i]-xnew) for i in range(n)])
    return np.mean(y[closest[0:k]])

k = 3
xgrid = np.linspace(TV.min(), TV.max(), num=1000)
yhat = [knnregression(TV,sales,k,xnew) for xnew in xgrid]

plt.scatter(TV, sales, s=5, c='b', alpha=0.5)
plt.plot(xgrid, yhat, 'r')
plt.xlabel('TV'), plt.ylabel('sales')
plt.show()

x = np.linspace(0,1,num=101)
y = np.linspace(0,1,num=101)
X, Y = np.meshgrid(x, y)
print(X)

[[0.   0.01 0.02 ... 0.98 0.99 1.  ]
 [0.   0.01 0.02 ... 0.98 0.99 1.  ]
 [0.   0.01 0.02 ... 0.98 0.99 1.  ]
 ...
 [0.   0.01 0.02 ... 0.98 0.99 1.  ]
 [0.   0.01 0.02 ... 0.98 0.99 1.  ]
 [0.   0.01 0.02 ... 0.98 0.99 1.  ]]

print(Y)

[[0.   0.   0.   ... 0.   0.   0.  ]
 [0.01 0.01 0.01 ... 0.01 0.01 0.01]
 [0.02 0.02 0.02 ... 0.02 0.02 0.02]
 ...
 [0.98 0.98 0.98 ... 0.98 0.98 0.98]
 [0.99 0.99 0.99 ... 0.99 0.99 0.99]
 [1.   1.   1.   ... 1.   1.   1.  ]]

Z = X + Y
print(Z)

[[0.   0.01 0.02 ... 0.98 0.99 1.  ]
 [0.01 0.02 0.03 ... 0.99 1.   1.01]
 [0.02 0.03 0.04 ... 1.   1.01 1.02]
 ...
 [0.98 0.99 1.   ... 1.96 1.97 1.98]
 [0.99 1.   1.01 ... 1.97 1.98 1.99]
 [1.   1.01 1.02 ... 1.98 1.99 2.  ]]

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, cmap='viridis')
plt.show()

w1 = np.array([1., 0., 1.])
w2 = np.array([0., 1., 1.])
A = np.stack((w1, w2),axis=-1)
b = np.array([0., 0., 2.])
x = LA.solve(A.T @ A, A.T @ b)
print(x)

[0.66666667 0.66666667]

x = LA.lstsq(A, b, rcond=None)[0]
print(x)

[0.66666667 0.66666667]

def gramschmidt(A):
    (n,m) = A.shape
    Q = np.zeros((n,m))
    R = np.zeros((m,m))
    for j in range(m):
        v = np.copy(A[:,j])
        for i in range(j):
            R[i,j] = np.dot(Q[:,i], A[:,j])
            v -= R[i,j]*Q[:,i]
        R[j,j] = LA.norm(v)
        Q[:,j] = v/R[j,j]
    return Q, R

w1 = np.array([1., 0., 1.])
w2 = np.array([0., 1., 1.])
A = np.stack((w1, w2),axis=-1)
print(A)

[[1. 0.]
 [0. 1.]
 [1. 1.]]

Q, R = gramschmidt(A)
print(Q)

[[ 0.70710678 -0.40824829]
 [ 0.          0.81649658]
 [ 0.70710678  0.40824829]]

print(R)

[[1.41421356 0.70710678]
 [0.         1.22474487]]

def backsubs(R,b):
    m = b.shape[0]
    x = np.zeros(m)
    for i in reversed(range(m)):
        x[i] = (b[i] - np.dot(R[i,i+1:m],x[i+1:m]))/R[i,i]
    return x

def forwardsubs(L,b):
    m = b.shape[0]
    x = np.zeros(m)
    for i in range(m):
        x[i] = (b[i] - np.dot(L[i,0:i],x[0:i]))/L[i,i]
    return x

def ls_by_qr(A, b):
    Q, R = gramschmidt(A)
    return backsubs(R, Q.T @ b)

w1 = np.array([1., 0., 1.])
w2 = np.array([0., 1., 1.])
A = np.stack((w1, w2),axis=-1)
b = np.array([0., 0., 2.])

x = ls_by_qr(A, b)
print(x)

[0.66666667 0.66666667]

def householder(A, b):
    n, m = A.shape
    R = np.copy(A)
    Qtb = np.copy(b)
    for k in range(m):
    
        y = R[k:n,k]
        e1 = np.zeros(n-k)
        e1[0] = 1
        z = np.sign(y[0]) * LA.norm(y) * e1 + y
        z = z / LA.norm(z)
        
        R[k:n,k:m] = R[k:n,k:m] - 2 * np.outer(z, z) @ R[k:n,k:m]
        Qtb[k:n] = Qtb[k:n] - 2 * np.outer(z, z) @ Qtb[k:n]
    
    return R[0:m,0:m], Qtb[0:m]

w1 = np.array([1., 0., 1.])
w2 = np.array([0., 1., 1.])
A = np.stack((w1, w2),axis=-1)
b = np.array([0., 0., 2.])
R, Qtb = householder(A, b)
x = backsubs(R, Qtb)
print(x)

[0.66666667 0.66666667]

seed = 535
rng = np.random.default_rng(seed)

n = 50
U, W = LA.qr(rng.normal(0,1,(n,n)))
V, W = LA.qr(rng.normal(0,1,(n,n)))
S = np.diag((1/2) ** np.arange(1,n+1))
A = U @ S @ V.T

Qgs, Rgs = gramschmidt(A)
print(LA.norm(A - Qgs @ Rgs))
print(LA.norm(Qgs.T @ Qgs - np.identity(n)))

1.4369568046009742e-16
19.745599060592102

Qhh, Rhh = LA.qr(A)
print(LA.norm(A - Qhh @ Rhh))
print(LA.norm(Qhh.T @ Qhh - np.identity(n)))

4.739138228891714e-16
5.33506987519293e-15

n, b0, b1 = 100, -1, 1
x = np.linspace(0,10,num=n)
y = b0 + b1*x

plt.scatter(x, y, s=3, c='k')
plt.show()

seed = 535
rng = np.random.default_rng(seed)

y += rng.normal(0,1,n)

plt.scatter(x, y, s=5, c='k')
plt.show()

A = np.stack((np.ones(n),x),axis=-1)
coeff = mmids.ls_by_qr(A,y)
print(coeff)

[-1.03381171  1.01808039]

plt.scatter(x, y, s=5, c='b', alpha=0.5)
plt.plot(x, coeff[0]+coeff[1]*x, 'r')
plt.show()

n, b0, b1, b2 = 100, 0, 0, 1
x = np.linspace(0,10,num=n)
y = b0 + b1 * x + b2 * x**2 + 10*rng.normal(0,1,n)

plt.scatter(x, y, s=5, c='k')
plt.show()

A = np.stack((np.ones(n), x, x**2), axis=-1)
coeff = mmids.ls_by_qr(A,y)
print(coeff)

[-2.76266982  1.01627798  0.93554204]

plt.scatter(x, y, s=5, c='b', alpha=0.5)
plt.plot(x, coeff[0] + coeff[1] * x + coeff[2] * x**2, 'r')
plt.show()

data = pd.read_csv('advertising.csv')

TV = data['TV'].to_numpy()
sales = data['sales'].to_numpy()
n = np.size(TV)
A = np.stack((np.ones(n),TV),axis=-1)
coeff = mmids.ls_by_qr(A,sales)
print(coeff)

[7.03259355 0.04753664]

TVgrid = np.linspace(TV.min(), TV.max(), num=100)
plt.scatter(TV, sales, s=5, c='b', alpha=0.5)
plt.plot(TVgrid, coeff[0]+coeff[1]*TVgrid, 'r')
plt.show()

A = np.stack((np.ones(n), TV, TV**2), axis=-1)
coeff = mmids.ls_by_qr(A,sales)
print(coeff)

[ 6.11412013e+00  6.72659270e-02 -6.84693373e-05]

plt.scatter(TV, sales, s=5, c='b', alpha=0.5)
plt.plot(TVgrid, coeff[0] + coeff[1] * TVgrid + coeff[2] * TVgrid**2, 'r')
plt.show()

deg = 20
A = np.stack([TV**i for i in range(deg+1)], axis=-1)
coeff = mmids.ls_by_qr(A,sales)
print(coeff)

[ 1.06538698e+00  6.72896471e-01 -1.53138969e-02 -2.74088516e-04
  1.83651714e-05 -3.40080020e-07  3.17915742e-09 -1.64042005e-11
  4.43633296e-14 -4.25654490e-17 -5.28727398e-20  1.11822932e-22
 -3.47096893e-27 -2.44665112e-30 -2.79435976e-33 -4.05263859e-36
 -6.83137511e-39 -1.27993830e-41 -2.59569760e-44 -5.59960687e-47
 -1.26949578e-49]

saleshat = np.sum([coeff[i] * TVgrid**i for i in range(deg+1)], axis=0)

plt.scatter(TV, sales, s=5, c='b', alpha=0.5)
plt.plot(TVgrid, saleshat, 'r')
plt.show()

	TV	radio	newspaper	sales
0	230.1	37.8	69.2	22.1
1	44.5	39.3	45.1	10.4
2	17.2	45.9	69.3	9.3
3	151.5	41.3	58.5	18.5
4	180.8	10.8	58.4	12.9

Motivating example: predicting sales¶

Background: review of vector spaces and matrix inverses¶

Geometry of least squares: the orthogonal projection¶

QR decomposition and Householder transformations¶

Application: regression analysis¶