Python Programming - A+ Work Required. Only Experts
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"scania_failures.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"t4NcxAzAzxUf","colab_type":"code","outputId":"0d8056ce-85d5-482a-d2a5-d69f5a2f95f1","executionInfo":{"status":"ok","timestamp":1569337732647,"user_tz":240,"elapsed":18480,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":139}},"source":["# Load the Drive helper and mount\n","from google.colab import drive\n","\n","# This will prompt for authorization.\n","drive.mount('/content/drive')\n","\n","import os\n","os.chdir('/content/drive/My Drive/CNT 4149')\n","# setting the working directory\n","os.getcwd() "],"execution_count":1,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at /content/drive\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["'/content/drive/My Drive/CNT 4149'"]},"metadata":{"tags":[]},"execution_count":1}]},{"cell_type":"markdown","metadata":{"id":"PusD5tpk1435","colab_type":"text"},"source":["# HW 2\n","\n","\n","1. Train a neural network to predict the class variable \n","2. Display the learning curve (Accuracy vs num of epochs) for 4 values of learning rate (0.0001, 0.001, 0.01 and 0.1)\n","3. Test your model on the test set and report the accuracy\n","4. Display the confusion matrix\n","5. Further optimize the model by changing the number of nodes and layers to obtain the best true positive rate"]},{"cell_type":"code","metadata":{"id":"hEcxWjHaz-V4","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"3b5189ed-2521-4a85-c99e-4e690242c59c","executionInfo":{"status":"ok","timestamp":1569337734832,"user_tz":240,"elapsed":20653,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}}},"source":["import pandas as pd\n","import numpy as np\n","import matplotlib.pyplot as plt\n","from sklearn import preprocessing\n","\n","from keras import models\n","from keras import layers\n","from sklearn.metrics import confusion_matrix\n","\n","%matplotlib inline"],"execution_count":2,"outputs":[{"output_type":"stream","text":["Using TensorFlow backend.\n"],"name":"stderr"}]},{"cell_type":"code","metadata":{"id":"AdbXvmqH0KVR","colab_type":"code","outputId":"0770b67f-2a78-4498-98f0-458c3bd925f6","executionInfo":{"status":"ok","timestamp":1569337739328,"user_tz":240,"elapsed":25139,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":253}},"source":["train = pd.read_csv(\"aps_failure_training_set.csv\" , skiprows=20)\n","\n","train.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>76698</td>\n"," <td>na</td>\n"," <td>2130706438</td>\n"," <td>280</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>37250</td>\n"," <td>1432864</td>\n"," <td>3664156</td>\n"," <td>1007684</td>\n"," <td>25896</td>\n"," <td>0</td>\n"," <td>2551696</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>4933296</td>\n"," <td>3655166</td>\n"," <td>1766008</td>\n"," <td>1132040</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1012</td>\n"," <td>268</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>469014</td>\n"," <td>4239660</td>\n"," <td>...</td>\n"," <td>18</td>\n"," <td>5330690</td>\n"," <td>4732</td>\n"," <td>1126</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>62282</td>\n"," <td>85908</td>\n"," <td>32790</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>202710</td>\n"," <td>37928</td>\n"," <td>14745580</td>\n"," <td>1876644</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2801180</td>\n"," <td>2445.8</td>\n"," <td>2712</td>\n"," <td>965866</td>\n"," <td>1706908</td>\n"," <td>1240520</td>\n"," <td>493384</td>\n"," <td>721044</td>\n"," <td>469792</td>\n"," <td>339156</td>\n"," <td>157956</td>\n"," <td>73224</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>33058</td>\n"," <td>na</td>\n"," <td>0</td>\n"," <td>na</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>18254</td>\n"," <td>653294</td>\n"," <td>1720800</td>\n"," <td>516724</td>\n"," <td>31642</td>\n"," <td>0</td>\n"," <td>1393352</td>\n"," <td>0</td>\n"," <td>68</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2560898</td>\n"," <td>2127150</td>\n"," <td>1084598</td>\n"," <td>338544</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>71510</td>\n"," <td>772720</td>\n"," <td>...</td>\n"," <td>na</td>\n"," <td>na</td>\n"," <td>3312</td>\n"," <td>522</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>33736</td>\n"," <td>36946</td>\n"," <td>5936</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>103330</td>\n"," <td>16254</td>\n"," <td>4510080</td>\n"," <td>868538</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3477820</td>\n"," <td>2211.76</td>\n"," <td>2334</td>\n"," <td>664504</td>\n"," <td>824154</td>\n"," <td>421400</td>\n"," <td>178064</td>\n"," <td>293306</td>\n"," <td>245416</td>\n"," <td>133654</td>\n"," <td>81140</td>\n"," <td>97576</td>\n"," <td>1500</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>41040</td>\n"," <td>na</td>\n"," <td>228</td>\n"," <td>100</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1648</td>\n"," <td>370592</td>\n"," <td>1883374</td>\n"," <td>292936</td>\n"," <td>12016</td>\n"," <td>0</td>\n"," <td>1234132</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2371990</td>\n"," <td>2173634</td>\n"," <td>300796</td>\n"," <td>153698</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>358</td>\n"," <td>110</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>870456</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>2341048</td>\n"," <td>1494</td>\n"," <td>152</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>13876</td>\n"," <td>38182</td>\n"," <td>8138</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>65772</td>\n"," <td>10534</td>\n"," <td>300240</td>\n"," <td>48028</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1040120</td>\n"," <td>1018.64</td>\n"," <td>1020</td>\n"," <td>262032</td>\n"," <td>453378</td>\n"," <td>277378</td>\n"," <td>159812</td>\n"," <td>423992</td>\n"," <td>409564</td>\n"," <td>320746</td>\n"," <td>158022</td>\n"," <td>95128</td>\n"," <td>514</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>12</td>\n"," <td>0</td>\n"," <td>70</td>\n"," <td>66</td>\n"," <td>0</td>\n"," <td>10</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>318</td>\n"," <td>2212</td>\n"," <td>3232</td>\n"," <td>1872</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2668</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>642</td>\n"," <td>3894</td>\n"," <td>10184</td>\n"," <td>7554</td>\n"," <td>10764</td>\n"," <td>1014</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>60</td>\n"," <td>6</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>2578</td>\n"," <td>76</td>\n"," <td>62</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>232</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2014</td>\n"," <td>370</td>\n"," <td>48</td>\n"," <td>18</td>\n"," <td>15740</td>\n"," <td>1822</td>\n"," <td>20174</td>\n"," <td>44</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1.08</td>\n"," <td>54</td>\n"," <td>5670</td>\n"," <td>1566</td>\n"," <td>240</td>\n"," <td>46</td>\n"," <td>58</td>\n"," <td>44</td>\n"," <td>10</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>4</td>\n"," <td>32</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>60874</td>\n"," <td>na</td>\n"," <td>1368</td>\n"," <td>458</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>43752</td>\n"," <td>1966618</td>\n"," <td>1800340</td>\n"," <td>131646</td>\n"," <td>4588</td>\n"," <td>0</td>\n"," <td>1974038</td>\n"," <td>0</td>\n"," <td>226</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3230626</td>\n"," <td>2618878</td>\n"," <td>1058136</td>\n"," <td>551022</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1788</td>\n"," <td>642</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>42124</td>\n"," <td>372236</td>\n"," <td>2128914</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>3590004</td>\n"," <td>2026</td>\n"," <td>444</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>44946</td>\n"," <td>62648</td>\n"," <td>11506</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>149474</td>\n"," <td>35154</td>\n"," <td>457040</td>\n"," <td>80482</td>\n"," <td>98334</td>\n"," <td>27588</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>21173050</td>\n"," <td>1116.06</td>\n"," <td>1176</td>\n"," <td>404740</td>\n"," <td>904230</td>\n"," <td>622012</td>\n"," <td>229790</td>\n"," <td>405298</td>\n"," <td>347188</td>\n"," <td>286954</td>\n"," <td>311560</td>\n"," <td>433954</td>\n"," <td>1218</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ad_000 ... ee_007 ee_008 ee_009 ef_000 eg_000\n","0 neg 76698 na 2130706438 280 ... 157956 73224 0 0 0\n","1 neg 33058 na 0 na ... 81140 97576 1500 0 0\n","2 neg 41040 na 228 100 ... 158022 95128 514 0 0\n","3 neg 12 0 70 66 ... 0 0 0 4 32\n","4 neg 60874 na 1368 458 ... 311560 433954 1218 0 0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"1hlh1rIP73GE","colab_type":"code","outputId":"b865db52-36af-41a0-eb71-b9aaf8876c00","executionInfo":{"status":"ok","timestamp":1569337739331,"user_tz":240,"elapsed":25135,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":102}},"source":["train.info()"],"execution_count":4,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 60000 entries, 0 to 59999\n","Columns: 171 entries, class to eg_000\n","dtypes: int64(1), object(170)\n","memory usage: 78.3+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"66rNHH8c8HOs","colab_type":"code","outputId":"1f6ec2ce-3cab-4655-d0d2-b11d3701b6cd","executionInfo":{"status":"ok","timestamp":1569337753255,"user_tz":240,"elapsed":39053,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":253}},"source":["for col in train.drop(['class'], axis=1).columns:\n"," \n"," train[col] = pd.to_numeric(train[col], errors= 'coerce')\n","\n","train.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>76698</td>\n"," <td>NaN</td>\n"," <td>2.130706e+09</td>\n"," <td>280.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>37250.0</td>\n"," <td>1432864.0</td>\n"," <td>3664156.0</td>\n"," <td>1007684.0</td>\n"," <td>25896.0</td>\n"," <td>0.0</td>\n"," <td>2551696.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>4933296.0</td>\n"," <td>3655166.0</td>\n"," <td>1766008.0</td>\n"," <td>1132040.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1012.0</td>\n"," <td>268.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>469014.0</td>\n"," <td>4239660.0</td>\n"," <td>...</td>\n"," <td>18.0</td>\n"," <td>5330690.0</td>\n"," <td>4732.0</td>\n"," <td>1126.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>62282.0</td>\n"," <td>85908.0</td>\n"," <td>32790.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>202710.0</td>\n"," <td>37928.0</td>\n"," <td>14745580.0</td>\n"," <td>1876644.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2801180.0</td>\n"," <td>2445.80</td>\n"," <td>2712.0</td>\n"," <td>965866.0</td>\n"," <td>1706908.0</td>\n"," <td>1240520.0</td>\n"," <td>493384.0</td>\n"," <td>721044.0</td>\n"," <td>469792.0</td>\n"," <td>339156.0</td>\n"," <td>157956.0</td>\n"," <td>73224.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>33058</td>\n"," <td>NaN</td>\n"," <td>0.000000e+00</td>\n"," <td>NaN</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>18254.0</td>\n"," <td>653294.0</td>\n"," <td>1720800.0</td>\n"," <td>516724.0</td>\n"," <td>31642.0</td>\n"," <td>0.0</td>\n"," <td>1393352.0</td>\n"," <td>0.0</td>\n"," <td>68.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2560898.0</td>\n"," <td>2127150.0</td>\n"," <td>1084598.0</td>\n"," <td>338544.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>71510.0</td>\n"," <td>772720.0</td>\n"," <td>...</td>\n"," <td>NaN</td>\n"," <td>NaN</td>\n"," <td>3312.0</td>\n"," <td>522.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>33736.0</td>\n"," <td>36946.0</td>\n"," <td>5936.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>103330.0</td>\n"," <td>16254.0</td>\n"," <td>4510080.0</td>\n"," <td>868538.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3477820.0</td>\n"," <td>2211.76</td>\n"," <td>2334.0</td>\n"," <td>664504.0</td>\n"," <td>824154.0</td>\n"," <td>421400.0</td>\n"," <td>178064.0</td>\n"," <td>293306.0</td>\n"," <td>245416.0</td>\n"," <td>133654.0</td>\n"," <td>81140.0</td>\n"," <td>97576.0</td>\n"," <td>1500.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>41040</td>\n"," <td>NaN</td>\n"," <td>2.280000e+02</td>\n"," <td>100.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1648.0</td>\n"," <td>370592.0</td>\n"," <td>1883374.0</td>\n"," <td>292936.0</td>\n"," <td>12016.0</td>\n"," <td>0.0</td>\n"," <td>1234132.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2371990.0</td>\n"," <td>2173634.0</td>\n"," <td>300796.0</td>\n"," <td>153698.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>358.0</td>\n"," <td>110.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>870456.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>2341048.0</td>\n"," <td>1494.0</td>\n"," <td>152.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>13876.0</td>\n"," <td>38182.0</td>\n"," <td>8138.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>65772.0</td>\n"," <td>10534.0</td>\n"," <td>300240.0</td>\n"," <td>48028.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1040120.0</td>\n"," <td>1018.64</td>\n"," <td>1020.0</td>\n"," <td>262032.0</td>\n"," <td>453378.0</td>\n"," <td>277378.0</td>\n"," <td>159812.0</td>\n"," <td>423992.0</td>\n"," <td>409564.0</td>\n"," <td>320746.0</td>\n"," <td>158022.0</td>\n"," <td>95128.0</td>\n"," <td>514.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>12</td>\n"," <td>0.0</td>\n"," <td>7.000000e+01</td>\n"," <td>66.0</td>\n"," <td>0.0</td>\n"," <td>10.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>318.0</td>\n"," <td>2212.0</td>\n"," <td>3232.0</td>\n"," <td>1872.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2668.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>642.0</td>\n"," <td>3894.0</td>\n"," <td>10184.0</td>\n"," <td>7554.0</td>\n"," <td>10764.0</td>\n"," <td>1014.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>60.0</td>\n"," <td>6.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>2578.0</td>\n"," <td>76.0</td>\n"," <td>62.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>232.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2014.0</td>\n"," <td>370.0</td>\n"," <td>48.0</td>\n"," <td>18.0</td>\n"," <td>15740.0</td>\n"," <td>1822.0</td>\n"," <td>20174.0</td>\n"," <td>44.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1.08</td>\n"," <td>54.0</td>\n"," <td>5670.0</td>\n"," <td>1566.0</td>\n"," <td>240.0</td>\n"," <td>46.0</td>\n"," <td>58.0</td>\n"," <td>44.0</td>\n"," <td>10.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>4.0</td>\n"," <td>32.0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>60874</td>\n"," <td>NaN</td>\n"," <td>1.368000e+03</td>\n"," <td>458.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>43752.0</td>\n"," <td>1966618.0</td>\n"," <td>1800340.0</td>\n"," <td>131646.0</td>\n"," <td>4588.0</td>\n"," <td>0.0</td>\n"," <td>1974038.0</td>\n"," <td>0.0</td>\n"," <td>226.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3230626.0</td>\n"," <td>2618878.0</td>\n"," <td>1058136.0</td>\n"," <td>551022.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1788.0</td>\n"," <td>642.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>42124.0</td>\n"," <td>372236.0</td>\n"," <td>2128914.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>3590004.0</td>\n"," <td>2026.0</td>\n"," <td>444.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>44946.0</td>\n"," <td>62648.0</td>\n"," <td>11506.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>149474.0</td>\n"," <td>35154.0</td>\n"," <td>457040.0</td>\n"," <td>80482.0</td>\n"," <td>98334.0</td>\n"," <td>27588.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>21173050.0</td>\n"," <td>1116.06</td>\n"," <td>1176.0</td>\n"," <td>404740.0</td>\n"," <td>904230.0</td>\n"," <td>622012.0</td>\n"," <td>229790.0</td>\n"," <td>405298.0</td>\n"," <td>347188.0</td>\n"," <td>286954.0</td>\n"," <td>311560.0</td>\n"," <td>433954.0</td>\n"," <td>1218.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ... ee_008 ee_009 ef_000 eg_000\n","0 neg 76698 NaN 2.130706e+09 ... 73224.0 0.0 0.0 0.0\n","1 neg 33058 NaN 0.000000e+00 ... 97576.0 1500.0 0.0 0.0\n","2 neg 41040 NaN 2.280000e+02 ... 95128.0 514.0 0.0 0.0\n","3 neg 12 0.0 7.000000e+01 ... 0.0 0.0 4.0 32.0\n","4 neg 60874 NaN 1.368000e+03 ... 433954.0 1218.0 0.0 0.0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"q7i24-Vb-C1p","colab_type":"code","outputId":"d9375183-cef3-467f-84f5-d05a088871cb","executionInfo":{"status":"ok","timestamp":1569337754185,"user_tz":240,"elapsed":39977,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":68}},"source":["index = train.isna().apply(sum) > 30000\n","\n","train.columns[index]"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['ab_000', 'bm_000', 'bn_000', 'bo_000', 'bp_000', 'bq_000', 'br_000',\n"," 'cr_000'],\n"," dtype='object')"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"id":"WE_T70K4Fn57","colab_type":"code","colab":{}},"source":["train.drop(['ab_000', 'bm_000', 'bn_000', 'bo_000', 'bp_000', 'bq_000', 'br_000',\n"," 'cr_000'], axis=1, inplace = True)\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"yqYul8j4HGC9","colab_type":"code","outputId":"30b7d353-6768-45fc-a5b8-6b4fbfe4e1cf","executionInfo":{"status":"ok","timestamp":1569337754390,"user_tz":240,"elapsed":40172,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":102}},"source":["train.dropna().info()"],"execution_count":8,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 20915 entries, 0 to 59999\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 26.2+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"MoyKKysAIV17","colab_type":"code","outputId":"6e1b8f74-ce3e-475a-a0cd-07360868757b","executionInfo":{"status":"ok","timestamp":1569337754605,"user_tz":240,"elapsed":40381,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["list(train.dropna()['class']).count('pos')"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["183"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"l4-kcTSjKoLu","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"df183d7b-eee3-4269-b704-1abae320ef04","executionInfo":{"status":"ok","timestamp":1569337754607,"user_tz":240,"elapsed":40380,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}}},"source":["list(train['class']).count('pos')"],"execution_count":10,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1000"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"zcNQVR4S0QpZ","colab_type":"code","outputId":"2fed5912-a0a8-45a8-e491-d6af16aba138","executionInfo":{"status":"ok","timestamp":1569337757212,"user_tz":240,"elapsed":42979,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":253}},"source":["test = pd.read_csv(\"aps_failure_test_set.csv\" , skiprows=20)\n","\n","test.head()"],"execution_count":11,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>60</td>\n"," <td>0</td>\n"," <td>20</td>\n"," <td>12</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2682</td>\n"," <td>4736</td>\n"," <td>3862</td>\n"," <td>1846</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3976</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1520</td>\n"," <td>2374</td>\n"," <td>11516</td>\n"," <td>9480</td>\n"," <td>111258</td>\n"," <td>470</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>58</td>\n"," <td>26</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>6598</td>\n"," <td>70</td>\n"," <td>112</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>340</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1100</td>\n"," <td>574</td>\n"," <td>232</td>\n"," <td>66</td>\n"," <td>780</td>\n"," <td>882</td>\n"," <td>0</td>\n"," <td>4</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>465.5</td>\n"," <td>90</td>\n"," <td>7502</td>\n"," <td>3156</td>\n"," <td>1098</td>\n"," <td>138</td>\n"," <td>412</td>\n"," <td>654</td>\n"," <td>78</td>\n"," <td>88</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>82</td>\n"," <td>0</td>\n"," <td>68</td>\n"," <td>40</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>748</td>\n"," <td>12594</td>\n"," <td>3636</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>5244</td>\n"," <td>0</td>\n"," <td>60</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>23174</td>\n"," <td>18166</td>\n"," <td>23686</td>\n"," <td>1270</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>12</td>\n"," <td>82</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>7918</td>\n"," <td>78</td>\n"," <td>40</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>352</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3996</td>\n"," <td>584</td>\n"," <td>200</td>\n"," <td>62</td>\n"," <td>37580</td>\n"," <td>3756</td>\n"," <td>6368</td>\n"," <td>36</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2.86</td>\n"," <td>102</td>\n"," <td>10040</td>\n"," <td>3310</td>\n"," <td>1068</td>\n"," <td>276</td>\n"," <td>1620</td>\n"," <td>116</td>\n"," <td>86</td>\n"," <td>462</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>66002</td>\n"," <td>2</td>\n"," <td>212</td>\n"," <td>112</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>199486</td>\n"," <td>1358536</td>\n"," <td>1952422</td>\n"," <td>452706</td>\n"," <td>25130</td>\n"," <td>520</td>\n"," <td>0</td>\n"," <td>1891670</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>77898</td>\n"," <td>110548</td>\n"," <td>3605894</td>\n"," <td>3291610</td>\n"," <td>959756</td>\n"," <td>286536</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>106</td>\n"," <td>340</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>24286</td>\n"," <td>681260</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>3613906</td>\n"," <td>4218</td>\n"," <td>692</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>25278</td>\n"," <td>9438</td>\n"," <td>2504</td>\n"," <td>10262714</td>\n"," <td>1278664</td>\n"," <td>109700</td>\n"," <td>19072</td>\n"," <td>9520</td>\n"," <td>4902</td>\n"," <td>4434614</td>\n"," <td>70900</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>26002880</td>\n"," <td>2057.84</td>\n"," <td>2158</td>\n"," <td>396312</td>\n"," <td>538136</td>\n"," <td>495076</td>\n"," <td>380368</td>\n"," <td>440134</td>\n"," <td>269556</td>\n"," <td>1315022</td>\n"," <td>153680</td>\n"," <td>516</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>59816</td>\n"," <td>na</td>\n"," <td>1010</td>\n"," <td>936</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>123922</td>\n"," <td>984314</td>\n"," <td>1680050</td>\n"," <td>1135268</td>\n"," <td>92606</td>\n"," <td>14038</td>\n"," <td>1772828</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1116</td>\n"," <td>2372</td>\n"," <td>3546760</td>\n"," <td>3053176</td>\n"," <td>652616</td>\n"," <td>423374</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>7274</td>\n"," <td>0</td>\n"," <td>1622</td>\n"," <td>432</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>6388</td>\n"," <td>1091104</td>\n"," <td>...</td>\n"," <td>46</td>\n"," <td>3413978</td>\n"," <td>2924</td>\n"," <td>414</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>60</td>\n"," <td>38710</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>27740</td>\n"," <td>33354</td>\n"," <td>6330</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>133542</td>\n"," <td>21290</td>\n"," <td>2718360</td>\n"," <td>435370</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1179900</td>\n"," <td>1541.32</td>\n"," <td>1678</td>\n"," <td>659550</td>\n"," <td>691580</td>\n"," <td>540820</td>\n"," <td>243270</td>\n"," <td>483302</td>\n"," <td>485332</td>\n"," <td>431376</td>\n"," <td>210074</td>\n"," <td>281662</td>\n"," <td>3232</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>1814</td>\n"," <td>na</td>\n"," <td>156</td>\n"," <td>140</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>72</td>\n"," <td>17926</td>\n"," <td>82834</td>\n"," <td>3114</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>48978</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>97146</td>\n"," <td>89920</td>\n"," <td>12932</td>\n"," <td>5092</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>102</td>\n"," <td>50</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>11544</td>\n"," <td>73570</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>95372</td>\n"," <td>78</td>\n"," <td>36</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>538</td>\n"," <td>360</td>\n"," <td>142</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1822</td>\n"," <td>344</td>\n"," <td>2140</td>\n"," <td>394</td>\n"," <td>13664</td>\n"," <td>110</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>813740</td>\n"," <td>113.86</td>\n"," <td>52</td>\n"," <td>10216</td>\n"," <td>9958</td>\n"," <td>7646</td>\n"," <td>4144</td>\n"," <td>18466</td>\n"," <td>49782</td>\n"," <td>3176</td>\n"," <td>482</td>\n"," <td>76</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ad_000 ... ee_007 ee_008 ee_009 ef_000 eg_000\n","0 neg 60 0 20 12 ... 88 0 0 0 0\n","1 neg 82 0 68 40 ... 462 0 0 0 0\n","2 neg 66002 2 212 112 ... 153680 516 0 0 0\n","3 neg 59816 na 1010 936 ... 210074 281662 3232 0 0\n","4 neg 1814 na 156 140 ... 482 76 0 0 0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":11}]},{"cell_type":"code","metadata":{"id":"pUo7iz2H1Dvg","colab_type":"code","outputId":"ad85cfd8-330f-40f6-9163-d9d3a8f7ae83","executionInfo":{"status":"ok","timestamp":1569337761266,"user_tz":240,"elapsed":47027,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":253}},"source":["for col in test.drop(['class'], axis=1).columns:\n"," \n"," test[col] = pd.to_numeric(test[col], errors= 'coerce')\n","\n","test.head()"],"execution_count":12,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>60</td>\n"," <td>0.0</td>\n"," <td>20.0</td>\n"," <td>12.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2682.0</td>\n"," <td>4736.0</td>\n"," <td>3862.0</td>\n"," <td>1846.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3976.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1520.0</td>\n"," <td>2374.0</td>\n"," <td>11516.0</td>\n"," <td>9480.0</td>\n"," <td>111258.0</td>\n"," <td>470.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>58.0</td>\n"," <td>26.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>6598.0</td>\n"," <td>70.0</td>\n"," <td>112.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>340.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1100.0</td>\n"," <td>574.0</td>\n"," <td>232.0</td>\n"," <td>66.0</td>\n"," <td>780.0</td>\n"," <td>882.0</td>\n"," <td>0.0</td>\n"," <td>4.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>465.50</td>\n"," <td>90.0</td>\n"," <td>7502.0</td>\n"," <td>3156.0</td>\n"," <td>1098.0</td>\n"," <td>138.0</td>\n"," <td>412.0</td>\n"," <td>654.0</td>\n"," <td>78.0</td>\n"," <td>88.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>82</td>\n"," <td>0.0</td>\n"," <td>68.0</td>\n"," <td>40.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>748.0</td>\n"," <td>12594.0</td>\n"," <td>3636.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>5244.0</td>\n"," <td>0.0</td>\n"," <td>60.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>23174.0</td>\n"," <td>18166.0</td>\n"," <td>23686.0</td>\n"," <td>1270.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>12.0</td>\n"," <td>82.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>7918.0</td>\n"," <td>78.0</td>\n"," <td>40.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>352.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3996.0</td>\n"," <td>584.0</td>\n"," <td>200.0</td>\n"," <td>62.0</td>\n"," <td>37580.0</td>\n"," <td>3756.0</td>\n"," <td>6368.0</td>\n"," <td>36.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2.86</td>\n"," <td>102.0</td>\n"," <td>10040.0</td>\n"," <td>3310.0</td>\n"," <td>1068.0</td>\n"," <td>276.0</td>\n"," <td>1620.0</td>\n"," <td>116.0</td>\n"," <td>86.0</td>\n"," <td>462.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>66002</td>\n"," <td>2.0</td>\n"," <td>212.0</td>\n"," <td>112.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>199486.0</td>\n"," <td>1358536.0</td>\n"," <td>1952422.0</td>\n"," <td>452706.0</td>\n"," <td>25130.0</td>\n"," <td>520.0</td>\n"," <td>0.0</td>\n"," <td>1891670.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>77898.0</td>\n"," <td>110548.0</td>\n"," <td>3605894.0</td>\n"," <td>3291610.0</td>\n"," <td>959756.0</td>\n"," <td>286536.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>106.0</td>\n"," <td>340.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>24286.0</td>\n"," <td>681260.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>3613906.0</td>\n"," <td>4218.0</td>\n"," <td>692.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>25278.0</td>\n"," <td>9438.0</td>\n"," <td>2504.0</td>\n"," <td>10262714.0</td>\n"," <td>1278664.0</td>\n"," <td>109700.0</td>\n"," <td>19072.0</td>\n"," <td>9520.0</td>\n"," <td>4902.0</td>\n"," <td>4434614.0</td>\n"," <td>70900.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>26002880.0</td>\n"," <td>2057.84</td>\n"," <td>2158.0</td>\n"," <td>396312.0</td>\n"," <td>538136.0</td>\n"," <td>495076.0</td>\n"," <td>380368.0</td>\n"," <td>440134.0</td>\n"," <td>269556.0</td>\n"," <td>1315022.0</td>\n"," <td>153680.0</td>\n"," <td>516.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>59816</td>\n"," <td>NaN</td>\n"," <td>1010.0</td>\n"," <td>936.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>123922.0</td>\n"," <td>984314.0</td>\n"," <td>1680050.0</td>\n"," <td>1135268.0</td>\n"," <td>92606.0</td>\n"," <td>14038.0</td>\n"," <td>1772828.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1116.0</td>\n"," <td>2372.0</td>\n"," <td>3546760.0</td>\n"," <td>3053176.0</td>\n"," <td>652616.0</td>\n"," <td>423374.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>7274.0</td>\n"," <td>0.0</td>\n"," <td>1622.0</td>\n"," <td>432.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>6388.0</td>\n"," <td>1091104.0</td>\n"," <td>...</td>\n"," <td>46.0</td>\n"," <td>3413978.0</td>\n"," <td>2924.0</td>\n"," <td>414.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>60.0</td>\n"," <td>38710.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>27740.0</td>\n"," <td>33354.0</td>\n"," <td>6330.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>133542.0</td>\n"," <td>21290.0</td>\n"," <td>2718360.0</td>\n"," <td>435370.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1179900.0</td>\n"," <td>1541.32</td>\n"," <td>1678.0</td>\n"," <td>659550.0</td>\n"," <td>691580.0</td>\n"," <td>540820.0</td>\n"," <td>243270.0</td>\n"," <td>483302.0</td>\n"," <td>485332.0</td>\n"," <td>431376.0</td>\n"," <td>210074.0</td>\n"," <td>281662.0</td>\n"," <td>3232.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>1814</td>\n"," <td>NaN</td>\n"," <td>156.0</td>\n"," <td>140.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>72.0</td>\n"," <td>17926.0</td>\n"," <td>82834.0</td>\n"," <td>3114.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>48978.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>97146.0</td>\n"," <td>89920.0</td>\n"," <td>12932.0</td>\n"," <td>5092.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>102.0</td>\n"," <td>50.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>11544.0</td>\n"," <td>73570.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>95372.0</td>\n"," <td>78.0</td>\n"," <td>36.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>538.0</td>\n"," <td>360.0</td>\n"," <td>142.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1822.0</td>\n"," <td>344.0</td>\n"," <td>2140.0</td>\n"," <td>394.0</td>\n"," <td>13664.0</td>\n"," <td>110.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>813740.0</td>\n"," <td>113.86</td>\n"," <td>52.0</td>\n"," <td>10216.0</td>\n"," <td>9958.0</td>\n"," <td>7646.0</td>\n"," <td>4144.0</td>\n"," <td>18466.0</td>\n"," <td>49782.0</td>\n"," <td>3176.0</td>\n"," <td>482.0</td>\n"," <td>76.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ... ee_008 ee_009 ef_000 eg_000\n","0 neg 60 0.0 20.0 ... 0.0 0.0 0.0 0.0\n","1 neg 82 0.0 68.0 ... 0.0 0.0 0.0 0.0\n","2 neg 66002 2.0 212.0 ... 516.0 0.0 0.0 0.0\n","3 neg 59816 NaN 1010.0 ... 281662.0 3232.0 0.0 0.0\n","4 neg 1814 NaN 156.0 ... 76.0 0.0 0.0 0.0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"id":"1qGBAgY1H4OL","colab_type":"code","outputId":"f77d2266-dab7-40b4-f745-631151982530","executionInfo":{"status":"ok","timestamp":1569337761267,"user_tz":240,"elapsed":47023,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":102}},"source":["test.info()"],"execution_count":13,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 16000 entries, 0 to 15999\n","Columns: 171 entries, class to eg_000\n","dtypes: float64(169), int64(1), object(1)\n","memory usage: 20.9+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"UZ2cITzvIKpb","colab_type":"code","colab":{}},"source":["test.drop(['ab_000', 'bm_000', 'bn_000', 'bo_000', 'bp_000', 'bq_000', 'br_000',\n"," 'cr_000'], axis=1, inplace = True)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"7wRkce-rH7M_","colab_type":"code","outputId":"40a8493b-1b21-4e79-c30f-046791b5e52f","executionInfo":{"status":"ok","timestamp":1569337761269,"user_tz":240,"elapsed":47015,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":102}},"source":["test.dropna().info()"],"execution_count":15,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 5620 entries, 2 to 15997\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 7.0+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"_pS9tSR6Lgum","colab_type":"code","outputId":"703fa561-efd2-4ebb-d4a3-2c7a8b09a754","executionInfo":{"status":"ok","timestamp":1569337761482,"user_tz":240,"elapsed":47220,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":102}},"source":["train1 = train.dropna()\n","\n","train1.info()"],"execution_count":16,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 20915 entries, 0 to 59999\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 26.2+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"c0hNvW7kLZcX","colab_type":"code","outputId":"9fb92a1a-007f-4081-9def-26fa256457b6","executionInfo":{"status":"ok","timestamp":1569337761482,"user_tz":240,"elapsed":47215,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":102}},"source":["test1 = test.dropna()\n","\n","test1.info()"],"execution_count":17,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 5620 entries, 2 to 15997\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 7.0+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"XjmByTaweTG5","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3M8DVJ1_NlWe","colab_type":"code","colab":{}},"source":["X_train = train1.drop(['class'], axis=1)\n","\n","y_train = train1['class']\n","\n","X_test = test1.drop(['class'], axis=1)\n","\n","y_test = test1['class']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c8GPtuulQNZE","colab_type":"code","colab":{}},"source":["\n","x = X_train.values #returns a numpy array\n","min_max_scaler = preprocessing.MinMaxScaler()\n","x_scaled = min_max_scaler.fit_transform(x)\n","X_train = pd.DataFrame(x_scaled)\n","\n","\n","x = X_test.values #returns a numpy array\n","min_max_scaler = preprocessing.MinMaxScaler()\n","x_scaled = min_max_scaler.fit_transform(x)\n","X_test = pd.DataFrame(x_scaled)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"oSo0OblqOfrZ","colab_type":"code","colab":{}},"source":["y_train = pd.get_dummies(y_train)\n","\n","y_test = pd.get_dummies(y_test)\n"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"DiITZ1mgCjgf","colab_type":"text"},"source":["Optimize : \n","\n","1. Number of Epochs\n","2. Number of Layers\n","3. Number of Nodes in each layer\n","4. Learning rate"]},{"cell_type":"code","metadata":{"id":"sUb1K7hc9_HK","colab_type":"code","outputId":"66368284-ed77-4ebb-e126-79c281bb8bdf","executionInfo":{"status":"ok","timestamp":1569341911952,"user_tz":240,"elapsed":22667,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":697}},"source":["from keras import optimizers, metrics, losses\n","model = models.Sequential()\n","model.add(layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)))\n","model.add(layers.Dense(64, activation='relu'))\n","model.add(layers.Dense(2, activation='softmax'))\n","model.compile(optimizer=optimizers.RMSprop(lr=0.001),\n"," loss=losses.binary_crossentropy,\n"," metrics=[metrics.binary_accuracy])\n","history = model.fit(X_train,y_train,epochs=20,batch_size=32)"],"execution_count":161,"outputs":[{"output_type":"stream","text":["Epoch 1/20\n","20915/20915 [==============================] - 2s 87us/step - loss: 0.0473 - binary_accuracy: 0.9923\n","Epoch 2/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0240 - binary_accuracy: 0.9945\n","Epoch 3/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0225 - binary_accuracy: 0.9954\n","Epoch 4/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0225 - binary_accuracy: 0.9954\n","Epoch 5/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0216 - binary_accuracy: 0.9957\n","Epoch 6/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0217 - binary_accuracy: 0.9960\n","Epoch 7/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0220 - binary_accuracy: 0.9958\n","Epoch 8/20\n","20915/20915 [==============================] - 1s 53us/step - loss: 0.0216 - binary_accuracy: 0.9956\n","Epoch 9/20\n","20915/20915 [==============================] - 1s 52us/step - loss: 0.0219 - binary_accuracy: 0.9961\n","Epoch 10/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0215 - binary_accuracy: 0.9964\n","Epoch 11/20\n","20915/20915 [==============================] - 1s 49us/step - loss: 0.0223 - binary_accuracy: 0.9961\n","Epoch 12/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0212 - binary_accuracy: 0.9963\n","Epoch 13/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0212 - binary_accuracy: 0.9964\n","Epoch 14/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0210 - binary_accuracy: 0.9965\n","Epoch 15/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0205 - binary_accuracy: 0.9963\n","Epoch 16/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0209 - binary_accuracy: 0.9964\n","Epoch 17/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0211 - binary_accuracy: 0.9967\n","Epoch 18/20\n","20915/20915 [==============================] - 1s 53us/step - loss: 0.0201 - binary_accuracy: 0.9966\n","Epoch 19/20\n","20915/20915 [==============================] - 1s 53us/step - loss: 0.0198 - binary_accuracy: 0.9965\n","Epoch 20/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0215 - binary_accuracy: 0.9968\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"0YQYK64i02SL","colab_type":"code","colab":{}},"source":["y_pred = model.predict(X_test)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"frF7ouT21EHQ","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"af45ee0f-8d6f-4882-c93d-b58cd3b9832c","executionInfo":{"status":"ok","timestamp":1569341916173,"user_tz":240,"elapsed":410,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}}},"source":["y_pred = np.argmax(y_pred, axis=1)\n","\n","y_pred"],"execution_count":163,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0, 0, 0, ..., 1, 0, 0])"]},"metadata":{"tags":[]},"execution_count":163}]},{"cell_type":"code","metadata":{"id":"6SRL1wuo5sKP","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"1e8a1209-93de-4bfe-db8c-f0d3397ea28f","executionInfo":{"status":"ok","timestamp":1569341923114,"user_tz":240,"elapsed":422,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}}},"source":["y_test = np.argmax(np.array(y_test), axis=1)\n","\n","y_test\n","\n"],"execution_count":165,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([0, 0, 0, ..., 1, 0, 0])"]},"metadata":{"tags":[]},"execution_count":165}]},{"cell_type":"code","metadata":{"id":"uduhdNw59_K6","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":51},"outputId":"44e73948-cd6a-4362-b22b-231b82f56f00","executionInfo":{"status":"ok","timestamp":1569341926054,"user_tz":240,"elapsed":417,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}}},"source":["from sklearn.metrics import confusion_matrix\n","\n","confusion_matrix(y_test, y_pred)"],"execution_count":166,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array([[5557, 1],\n"," [ 28, 34]])"]},"metadata":{"tags":[]},"execution_count":166}]},{"cell_type":"code","metadata":{"id":"qW3UMdsB_2BM","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":34},"outputId":"85f2286b-a233-487c-fe17-13f987ea211f","executionInfo":{"status":"ok","timestamp":1569341930473,"user_tz":240,"elapsed":410,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}}},"source":["from sklearn.metrics import roc_auc_score, precision_score, f1_score\n","\n","roc_auc_score(y_test,y_pred)"],"execution_count":167,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.7741035879696804"]},"metadata":{"tags":[]},"execution_count":167}]},{"cell_type":"code","metadata":{"id":"5f5BmtuHBZ71","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]}