big data
# -*- coding: utf-8 -*- """ 02/05/2020 @author: ltawalbeh """ import matplotlib.pyplot as plt import numpy as np from sklearn import datasets from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score # Load the diabetes dataset diabetes = datasets.load_diabetes() print(diabetes.DESCR) print ('The keys:', diabetes.keys()) print('The features are:', diabetes.feature_names) print('The shape:',diabetes.data.shape) print('The complete data set:', diabetes.data) print('The target:', diabetes.target) # Use only one feature #diabetes_1=diabetes.data[0] #print('the first feature:', diabetes_1) diabetes_X=diabetes.data[:,np.newaxis,0]# extend it to nX1 column vector #print('The first feature column vector is', diabetes_X) #print('The data shape-col-', diabetes_X.shape) diabetes_row=diabetes.data[np.newaxis,:,1]#extend it to 1Xn row vector #print('Row vector:',diabetes_row.shape,'the data is:',diabetes_row) # Split the data into training/testing sets diabetes_X_train = diabetes_X[:-20] #remove the last 20 elements #print('The training set is:', diabetes_X_train) #print("test"[:-1]) # removes the last character a=np.arange(10) print(a,' the actula vector') print(a[:-3], 'removes the last 3 elements') print(a[-3:], 'keeps only the last 3 elemnts') print(a[:3], 'keeps the first 3 elements') print(a[3:], 'removes only the first 3 elemnts') diabetes_X_test = diabetes_X[-20:] #keeps the last 20 elements print('The testing set:', diabetes_X_test) # Split the targets into training/testing sets diabetes_y_train = diabetes.target[:-20] diabetes_y_test = diabetes.target[-20:]# the last 20 samples for testing # Create linear regression object regr = LinearRegression() #print(regr) # Train the model using the training sets regr.fit(diabetes_X_train, diabetes_y_train) # trainign step # Make predictions using the testing set diabetes_y_pred = regr.predict(diabetes_X_test) #print('The predicted values:',diabetes_y_pred) #print('The target values:',diabetes_y_test) # The coefficients print('Coefficients:', regr.coef_) # The mean squared error print('Mean squared error: %.3f' % mean_squared_error(diabetes_y_test, diabetes_y_pred)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred)) # Plot outputs plt.scatter(diabetes_X_test, diabetes_y_test, color='black') plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)