data science

profileRoyal9
regression.pdf

regression

April 1, 2022

[1]: import numpy as np from sklearn.linear_model import Ridge from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt

[2]: # Read the data from the CSV file. X = np.genfromtxt('graduate-admission.csv', delimiter=',', skip_header=1) np.random.seed(12) np.random.shuffle(X)

[3]: # Divide X into a training set and a test set. split = 300 X_train = X[:split, :-1] y_train = X[:split, -1] X_test = X[split:, :-1] y_test = X[split:, -1] # -1 refers to the last value or the last column in an array.

[4]: X_train.shape, y_train.shape, X_test.shape, y_test.shape

[4]: ((300, 7), (300,), (100, 7), (100,))

[5]: # train the model: model = Ridge().fit(X_train, y_train)

[6]: # make predictions: y_pred = model.predict(X_test)

[7]: # stack the predictions next to true values output = np.column_stack((y_test, y_pred))

[8]: # For each prediction: # Find the difference (error) between true and predicted values, and square␣ ↪→it.

# return the average of all the squared differences. mean_squared_error(y_test, y_pred)

[8]: 0.004745184085722603

1

[9]: output = output[output[:, 0].argsort()] # sort the output on column 0 (true␣ ↪→values)

[10]: plt.plot(output[:, 0]) # plot the true values (y_test) plt.plot(output[:, 1], '.') # plot the predicted values (y_pred)

[10]: [<matplotlib.lines.Line2D at 0x1cf182a2af0>]

[ ]:

2