data mining

profileDurga-prasad_01
Tree_Clasifier_Iris_Dataset.py

# -*- coding: utf-8 -*- """ """ import matplotlib.pyplot as plt import numpy as np from sklearn.tree import DecisionTreeClassifier from sklearn import datasets, metrics #from sklearn.linear_model import LinearRegression # Load the Iris dataset irs = datasets.load_iris() #print(irs.DESCR) #print ('The keys:', irs.keys()) #print('The features are:', irs.feature_names) #print('The shape:', irs.data.shape) #print('The complete data set:', irs.data) #print('The target:', irs.target) #print('the mean of all features:', np.mean(irs.data, axis=0)) #axis=0: rows, =1: col # Use only one feature irs_1= irs.data[0] #print('the first feature:', irs_1) #print('the mean of the first feature is:', np.mean(irs_1, axis=0)) #axis=0: rows, =1: col #plt.scatter(irs.data[:,0], irs.data[:,1], irs.data[:,2], 'r' ) irs_X= irs.data[:,np.newaxis,1] # extend the 3rd feature it to nX1 column vector #print('The data shape-col-', irs_X.shape) #irs_X= irs.data[:,np.newaxis,1] # extend the 2nd feature it to nX1 column vector # Split the data into training/testing sets irs_X_train = irs_X[:-10] #remove the last 10 elements ''' a=np.arange(10) print(a,' the actula vector') print(a[:-3], 'removes the last 3 elements') print(a[-3:], 'keeps only the last 3 elemnts') print(a[:3], 'keeps the first 3 elements') print(a[3:], 'removes only the first 3 elemnts') ''' irs_X_test = irs_X[-10:] #keeps the last 10 elements #print('The testing set:', irs_X_test) # Split the targets into training/testing sets irs_y_train = irs.target[:-10] irs_y_test = irs.target[-10:]# keep the last 10 samples for testing # Create Classifier object clf= DecisionTreeClassifier() # Train the model using the training sets clf.fit(irs_X_train, irs_y_train) # trainign step # Make predictions using the testing set irs_y_pred = clf.predict(irs_X_test) #3print('The predicted values:',irs_y_pred) #print('The target values:',irs_y_test) # Plot outputs plt.scatter(irs_X_test, irs_y_test, color='black') plt.plot(irs_X_test, irs_y_pred, color='blue', linewidth=2)