data = pd.read_csv('customer_airline_satisfaction.csv')

print(data.iloc[:5, :6])

   Satisfied  Age  Class_Business  Class_Eco  Class_Eco Plus  Business travel
0          0   63               1          0               0                1
1          0   34               1          0               0                1
2          0   52               0          1               0                0
3          0   40               0          1               0                1
4          1   46               0          1               0                1

data.shape

(100000, 24)

print(data.columns.tolist())

['Satisfied', 'Age', 'Class_Business', 'Class_Eco', 'Class_Eco Plus', 'Business travel', 'Loyal customer', 'Flight Distance', 'Departure Delay in Minutes', 'Arrival Delay in Minutes', 'Seat comfort', 'Departure/Arrival time convenient', 'Food and drink', 'Gate location', 'Inflight wifi service', 'Inflight entertainment', 'Online support', 'Ease of Online booking', 'On-board service', 'Leg room service', 'Baggage handling', 'Checkin service', 'Cleanliness', 'Online boarding']

y = data['Satisfied'].to_numpy()
X = data.drop(columns=['Satisfied']).to_numpy()
print(y)

[0 0 0 ... 0 1 0]

print(X)

[[63  1  0 ...  3  3  4]
 [34  1  0 ...  2  3  4]
 [52  0  1 ...  4  3  4]
 ...
 [39  0  1 ...  4  1  1]
 [25  0  0 ...  5  3  1]
 [44  1  0 ...  1  2  2]]

age_col_index = 0
age_data = X[:, age_col_index]
age_bins = [0, 18, 25, 35, 45, 55, 65, 100]
age_labels = ['0-17', '18-24', '25-34', '35-44', '45-54', '55-64', '65+']
age_bin_indices = np.digitize(age_data, bins=age_bins) - 1
counts_all = np.bincount(age_bin_indices, minlength=len(age_labels))
counts_satisfied = np.bincount(age_bin_indices[y == 1], minlength=len(age_labels))
freq_satisfied = counts_satisfied / counts_all
age_group_labels = np.array(age_labels)

plt.figure(figsize=(4, 4))
plt.bar(age_group_labels, freq_satisfied, color='lightblue', edgecolor='black')
plt.xlabel('Age Group'), plt.ylabel('Frequency of Satisfied Customers')
plt.show()

x = np.linspace(0,1,100)
y = np.exp(x)
taylor = 1 + x
err = (np.exp(1)/2) * x**2

plt.plot(x,y,label='f')
plt.plot(x,taylor,label='taylor')
plt.plot(x,taylor-err,linestyle=':',color='green',label='lower')
plt.plot(x,taylor+err,linestyle='--',color='green',label='upper')
plt.legend()
plt.show()

x = np.linspace(-2,2,100)
y = x**3

plt.plot(x,y, c='k')
plt.ylim(-5,5)
plt.show()

def h1(x1, x2):
    return 1 - x1**2 - x2**2

x1, x2 = np.linspace(-1.5, 1.5, 400), np.linspace(-1.5, 1.5, 400)
X1, X2 = np.meshgrid(x1, x2)
H1 = h1(X1, X2)

plt.figure(figsize=(4, 4))
plt.contour(X1, X2, H1, levels=[0], colors='b')
points = [(0.5, np.sqrt(3)/2), (-0.5, np.sqrt(3)/2), (0.5, -np.sqrt(3)/2),
    (-0.5, -np.sqrt(3)/2), (1, 0), (-1, 0), (0, 1), (0, -1)]
for x1, x2 in points:
    plt.quiver(x1, x2, -x1/np.sqrt(x1**2 + x2**2), -x2/np.sqrt(x1**2 + x2**2), 
               scale=10, color='r')
    plt.quiver(x1, x2, 4*x1/np.sqrt(16 * x1**2 + 36 * x2**2), 
               6*x2/np.sqrt(16 * x1**2 + 36 * x2**2), scale=10, color='lime')

def desc_update(grad_f, x, alpha):
    return x - alpha*grad_f(x)

def gd(f, grad_f, x0, alpha=1e-3, niters=int(1e6)):
    
    xk = x0
    for _ in range(niters):
        xk = desc_update(grad_f, xk, alpha)

    return xk, f(xk)

def f(x): 
    return (x-1)**2 + 10

def grad_f(x):
    return 2*(x-1)

xgrid = np.linspace(-5,5,100)
plt.plot(xgrid, f(xgrid), label='f')
plt.plot(xgrid, grad_f(xgrid), label='grad_f')
plt.ylim((-20,50)), plt.legend()
plt.show()

gd(f, grad_f, 0)

(0.9999999999999722, 10.0)

def f(x): 
    return 4 * (x-1)**2 * (x+1)**2 - 2*(x-1)

def grad_f(x): 
    return 8 * (x-1) * (x+1)**2 + 8 * (x-1)**2 * (x+1) - 2

xgrid = np.linspace(-2,2,100)
plt.plot(xgrid, f(xgrid), label='f')
plt.plot(xgrid, grad_f(xgrid), label='grad_f')
plt.ylim((-10,10)), plt.legend()
plt.show()

gd(f, grad_f, 0)

(1.057453770738375, -0.0590145651028224)

gd(f, grad_f, -2)

(-0.9304029265558538, 3.933005966859003)

def f(x):
    return x**3

def grad_f(x):
    return 3 * x**2

xgrid = np.linspace(-2,2,100)
plt.plot(xgrid, f(xgrid), label='f')
plt.plot(xgrid, grad_f(xgrid), label='grad_f')
plt.ylim((-10,10)), plt.legend()
plt.show()

gd(f, grad_f, 2)

(0.00033327488712690107, 3.701755838398568e-11)

gd(f, grad_f, -2, niters=100)

(-4.93350410883896, -120.0788396909241)

def f(x): 
    return (x-1)**2 + 10

def grad_f(x):
    return 2*(x-1)

gd(f, grad_f, 0, alpha=0.5, niters=1)

(1.0, 10.0)

gd(f, grad_f, 100, alpha=0.5, niters=1)

(1.0, 10.0)

def sigmoid(z): 
    return 1/(1+np.exp(-z))

grid = np.linspace(-5, 5, 100)
plt.plot(grid, sigmoid(grid), c='k')
plt.show()

def desc_update_for_logreg(grad_fn, A, b, curr_x, beta):
    gradient = grad_fn(curr_x, A, b)
    return curr_x - beta*gradient

def gd_for_logreg(loss_fn, grad_fn, A, b, init_x, beta=1e-3, niters=int(1e5)):
    curr_x = init_x
    
    for iter in range(niters):
        curr_x = desc_update_for_logreg(grad_fn, A, b, curr_x, beta)
    
    return curr_x

def pred_fn(x, A): 
    return sigmoid(A @ x)

def loss_fn(x, A, b): 
    return np.mean(-b*np.log(pred_fn(x, A)) - (1 - b)*np.log(1 - pred_fn(x, A)))

def grad_fn(x, A, b):
    return -A.T @ (b - pred_fn(x, A))/len(b)

def stepsize_for_logreg(A, b):
    L = LA.norm(A)**2 / (4 * len(b))
    return 1/L

data = pd.read_csv('customer_airline_satisfaction.csv')
column_names = data.columns.tolist()
print(column_names)

['Satisfied', 'Age', 'Class_Business', 'Class_Eco', 'Class_Eco Plus', 'Business travel', 'Loyal customer', 'Flight Distance', 'Departure Delay in Minutes', 'Arrival Delay in Minutes', 'Seat comfort', 'Departure/Arrival time convenient', 'Food and drink', 'Gate location', 'Inflight wifi service', 'Inflight entertainment', 'Online support', 'Ease of Online booking', 'On-board service', 'Leg room service', 'Baggage handling', 'Checkin service', 'Cleanliness', 'Online boarding']

y = data['Satisfied'].to_numpy()
X = data.drop(columns=['Satisfied']).to_numpy()

means = np.mean(X, axis=0)
stds = np.std(X, axis=0)
X_standardized = (X - means) / stds

A = np.concatenate((np.ones((len(y),1)), X_standardized), axis=1)
b = y

init_x = np.zeros(A.shape[1])
best_x = gd_for_logreg(loss_fn, grad_fn, A, b, init_x, beta=1e-3, niters=int(1e3))
print(best_x)

[ 0.03622497  0.04123861  0.10020177 -0.08786108 -0.02485893  0.0420605
  0.11995567 -0.01799992 -0.02399636 -0.02653084  0.1176043  -0.02382631
  0.05909378 -0.01161711  0.06553672  0.21313777  0.12883519  0.14631027
  0.12239595  0.11282894  0.08556647  0.08954403  0.08447245  0.108043  ]

coefficients, features = best_x[1:], column_names[1:]

sorted_indices = np.argsort(coefficients)
sorted_coefficients = coefficients[sorted_indices]
sorted_features = np.array(features)[sorted_indices]

plt.figure(figsize=(6, 5))
plt.barh(sorted_features, sorted_coefficients, color='lightblue', edgecolor='black')
plt.xlabel('Coefficient Value'), plt.title('Logistic Regression Coefficients')
plt.show()

Motivating example: analyzing customer satisfaction¶

Optimality conditions¶

Gradient descent and its convergence analysis¶

Application: logistic regression¶