data mining

SVM_Clasifiers_Iris_Dataset1.py

Home >Computer Science homework help >data mining

# -*- coding: utf-8 -*- """ """ import pandas as pd import matplotlib.pyplot as plt import numpy as np from sklearn.tree import DecisionTreeClassifier from sklearn import datasets, svm, metrics #from sklearn.linear_model import LinearRegression ''' # Load the Iris dataset irs = datasets.load_iris() print(irs.DESCR) print ('The keys:', irs.keys()) print('The features are:', irs.feature_names) #print('The shape:', irs.data.shape) #print('The complete data set:', irs.data) #print('The target:', irs.target) #print('the mean of all features:', np.mean(irs.data, axis=0)) #axis=0: rows, =1: col # Use only one feature irs_1= irs.data[0] #print('the first feature:', irs_1) #print('the mean of the first feature is:', np.mean(irs_1, axis=0)) #axis=0: rows, =1: col #plt.scatter(irs.data[:,0], irs.data[:,1], irs.data[:,2], 'r' ) irs_X= irs.data[:,np.newaxis,1] # extend the 3rd feature it to nX1 column vector #print('The data shape-col-', irs_X.shape) #irs_X= irs.data[:,np.newaxis,1] # extend the 2nd feature it to nX1 column vector # Split the data into training/testing sets irs_X_train = irs_X[:-20] #remove the last 20 elements a=np.arange(10) print(a,' the actula vector') print(a[:-3], 'removes the last 3 elements') print(a[-3:], 'keeps only the last 3 elemnts') print(a[:3], 'keeps the first 3 elements') print(a[3:], 'removes only the first 3 elemnts') irs_X_test = irs_X[-20:] #keeps the last 20 elements #print('The testing set:', irs_X_test) # Split the targets into training/testing sets irs_y_train = irs.target[:-20] irs_y_test = irs.target[-20:]# keep the last 10 samples for testing # Create Classifier object clf= DecisionTreeClassifier() # Train the model using the training sets clf.fit(irs_X_train, irs_y_train) # trainign step # Make predictions using the testing set irs_y_pred = clf.predict(irs_X_test) #3print('The predicted values:',irs_y_pred) #print('The target values:',irs_y_test) ''' #Week 5: SVM Classifiers url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" # Assign colum names to the dataset colnames = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class'] # Read dataset to pandas dataframe irisdata = pd.read_csv(url, names=colnames) X = irisdata.drop('Class', axis=1) y = irisdata['Class'] from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20) C = 1.0 # SVM regularization parameter #Linear kernal clf2= svm.SVC(kernel='linear', C=C) clf2.fit(X_train, y_train) y_pred = clf2.predict(X_test) print('The SVM Linear Kernal', metrics.confusion_matrix(y_test, y_pred)) print(metrics.classification_report(y_test, y_pred)) ''' # Polynomial Kernel clf3= svm.SVC(kernel='poly', degree=5) clf3.fit(X_train, y_train) y_pred = clf3.predict(X_test) print('The SVM polynomial Kernal', metrics.confusion_matrix(y_test, y_pred)) print(metrics.classification_report(y_test, y_pred)) #Gaussian Kernel clf4= svm.SVC(kernel='rbf' )#, gamma='auto') clf4.fit(X_train, y_train) y_pred = clf4.predict(X_test) print('The SVM Gaussian Kernal', metrics.confusion_matrix(y_test, y_pred)) print(metrics.classification_report(y_test, y_pred)) #Sigmoid Kernel clf5= svm.SVC(kernel='sigmoid') clf5.fit(X_train, y_train) y_pred = clf5.predict(X_test) ''' print('The SVM Sigmoid Kernal', metrics.confusion_matrix(y_test, y_pred)) print(metrics.classification_report(y_test, y_pred)) # Plot outputs plt.figure() #plt.plot(y_test, y_pred, color='red', linewidth=2) plt.scatter(y_test, y_pred, color='black')