Python Programming - A+ Work Required. Only Experts
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"scania_failures.ipynb","provenance":[],"collapsed_sections":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"id":"t4NcxAzAzxUf","colab_type":"code","outputId":"cd9d8159-9e81-4e5b-e6c6-dcdec6635914","executionInfo":{"status":"ok","timestamp":1568979529260,"user_tz":240,"elapsed":24920,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":150}},"source":["# Load the Drive helper and mount\n","from google.colab import drive\n","\n","# This will prompt for authorization.\n","drive.mount('/content/drive')\n","\n","import os\n","os.chdir('/content/drive/My Drive/CNT 4149')\n","# setting the working directory\n","os.getcwd() "],"execution_count":1,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n","\n","Enter your authorization code:\n","··········\n","Mounted at /content/drive\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/plain":["'/content/drive/My Drive/CNT 4149'"]},"metadata":{"tags":[]},"execution_count":1}]},{"cell_type":"markdown","metadata":{"id":"PusD5tpk1435","colab_type":"text"},"source":["# HW 2\n","\n","\n","1. Train a neural network to predict the class variable \n","2. Display the learning curve (Accuracy vs num of epochs) for 4 values of learning rate (0.0001, 0.001, 0.01 and 0.1)\n","3. Test your model on the test set and report the accuracy\n","4. Display the confusion matrix\n","5. Further optimize the model by changing the number of nodes and layers to obtain the best true positive rate"]},{"cell_type":"code","metadata":{"id":"hEcxWjHaz-V4","colab_type":"code","colab":{}},"source":["import pandas as pd\n","import numpy as np\n","import matplotlib.pyplot as plt\n","from sklearn import preprocessing\n","\n","from keras import models\n","from keras import layers\n","from sklearn.metrics import confusion_matrix\n","\n","%matplotlib inline"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"AdbXvmqH0KVR","colab_type":"code","outputId":"0117eae6-2576-447d-a9f8-beee263f5c1c","executionInfo":{"status":"ok","timestamp":1568979539326,"user_tz":240,"elapsed":5216,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":255}},"source":["train = pd.read_csv(\"aps_failure_training_set.csv\" , skiprows=20)\n","\n","train.head()"],"execution_count":3,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>76698</td>\n"," <td>na</td>\n"," <td>2130706438</td>\n"," <td>280</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>37250</td>\n"," <td>1432864</td>\n"," <td>3664156</td>\n"," <td>1007684</td>\n"," <td>25896</td>\n"," <td>0</td>\n"," <td>2551696</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>4933296</td>\n"," <td>3655166</td>\n"," <td>1766008</td>\n"," <td>1132040</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1012</td>\n"," <td>268</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>469014</td>\n"," <td>4239660</td>\n"," <td>...</td>\n"," <td>18</td>\n"," <td>5330690</td>\n"," <td>4732</td>\n"," <td>1126</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>62282</td>\n"," <td>85908</td>\n"," <td>32790</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>202710</td>\n"," <td>37928</td>\n"," <td>14745580</td>\n"," <td>1876644</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2801180</td>\n"," <td>2445.8</td>\n"," <td>2712</td>\n"," <td>965866</td>\n"," <td>1706908</td>\n"," <td>1240520</td>\n"," <td>493384</td>\n"," <td>721044</td>\n"," <td>469792</td>\n"," <td>339156</td>\n"," <td>157956</td>\n"," <td>73224</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>33058</td>\n"," <td>na</td>\n"," <td>0</td>\n"," <td>na</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>18254</td>\n"," <td>653294</td>\n"," <td>1720800</td>\n"," <td>516724</td>\n"," <td>31642</td>\n"," <td>0</td>\n"," <td>1393352</td>\n"," <td>0</td>\n"," <td>68</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2560898</td>\n"," <td>2127150</td>\n"," <td>1084598</td>\n"," <td>338544</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>71510</td>\n"," <td>772720</td>\n"," <td>...</td>\n"," <td>na</td>\n"," <td>na</td>\n"," <td>3312</td>\n"," <td>522</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>33736</td>\n"," <td>36946</td>\n"," <td>5936</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>103330</td>\n"," <td>16254</td>\n"," <td>4510080</td>\n"," <td>868538</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3477820</td>\n"," <td>2211.76</td>\n"," <td>2334</td>\n"," <td>664504</td>\n"," <td>824154</td>\n"," <td>421400</td>\n"," <td>178064</td>\n"," <td>293306</td>\n"," <td>245416</td>\n"," <td>133654</td>\n"," <td>81140</td>\n"," <td>97576</td>\n"," <td>1500</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>41040</td>\n"," <td>na</td>\n"," <td>228</td>\n"," <td>100</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1648</td>\n"," <td>370592</td>\n"," <td>1883374</td>\n"," <td>292936</td>\n"," <td>12016</td>\n"," <td>0</td>\n"," <td>1234132</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2371990</td>\n"," <td>2173634</td>\n"," <td>300796</td>\n"," <td>153698</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>358</td>\n"," <td>110</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>870456</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>2341048</td>\n"," <td>1494</td>\n"," <td>152</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>13876</td>\n"," <td>38182</td>\n"," <td>8138</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>65772</td>\n"," <td>10534</td>\n"," <td>300240</td>\n"," <td>48028</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1040120</td>\n"," <td>1018.64</td>\n"," <td>1020</td>\n"," <td>262032</td>\n"," <td>453378</td>\n"," <td>277378</td>\n"," <td>159812</td>\n"," <td>423992</td>\n"," <td>409564</td>\n"," <td>320746</td>\n"," <td>158022</td>\n"," <td>95128</td>\n"," <td>514</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>12</td>\n"," <td>0</td>\n"," <td>70</td>\n"," <td>66</td>\n"," <td>0</td>\n"," <td>10</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>318</td>\n"," <td>2212</td>\n"," <td>3232</td>\n"," <td>1872</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2668</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>642</td>\n"," <td>3894</td>\n"," <td>10184</td>\n"," <td>7554</td>\n"," <td>10764</td>\n"," <td>1014</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>60</td>\n"," <td>6</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>2578</td>\n"," <td>76</td>\n"," <td>62</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>232</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2014</td>\n"," <td>370</td>\n"," <td>48</td>\n"," <td>18</td>\n"," <td>15740</td>\n"," <td>1822</td>\n"," <td>20174</td>\n"," <td>44</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1.08</td>\n"," <td>54</td>\n"," <td>5670</td>\n"," <td>1566</td>\n"," <td>240</td>\n"," <td>46</td>\n"," <td>58</td>\n"," <td>44</td>\n"," <td>10</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>4</td>\n"," <td>32</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>60874</td>\n"," <td>na</td>\n"," <td>1368</td>\n"," <td>458</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>43752</td>\n"," <td>1966618</td>\n"," <td>1800340</td>\n"," <td>131646</td>\n"," <td>4588</td>\n"," <td>0</td>\n"," <td>1974038</td>\n"," <td>0</td>\n"," <td>226</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3230626</td>\n"," <td>2618878</td>\n"," <td>1058136</td>\n"," <td>551022</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1788</td>\n"," <td>642</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>42124</td>\n"," <td>372236</td>\n"," <td>2128914</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>3590004</td>\n"," <td>2026</td>\n"," <td>444</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>44946</td>\n"," <td>62648</td>\n"," <td>11506</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>149474</td>\n"," <td>35154</td>\n"," <td>457040</td>\n"," <td>80482</td>\n"," <td>98334</td>\n"," <td>27588</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>21173050</td>\n"," <td>1116.06</td>\n"," <td>1176</td>\n"," <td>404740</td>\n"," <td>904230</td>\n"," <td>622012</td>\n"," <td>229790</td>\n"," <td>405298</td>\n"," <td>347188</td>\n"," <td>286954</td>\n"," <td>311560</td>\n"," <td>433954</td>\n"," <td>1218</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ad_000 ... ee_007 ee_008 ee_009 ef_000 eg_000\n","0 neg 76698 na 2130706438 280 ... 157956 73224 0 0 0\n","1 neg 33058 na 0 na ... 81140 97576 1500 0 0\n","2 neg 41040 na 228 100 ... 158022 95128 514 0 0\n","3 neg 12 0 70 66 ... 0 0 0 4 32\n","4 neg 60874 na 1368 458 ... 311560 433954 1218 0 0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"id":"1hlh1rIP73GE","colab_type":"code","outputId":"974bc0bf-0d81-4c67-e3b3-f933c7fdb4b8","executionInfo":{"status":"ok","timestamp":1568979543359,"user_tz":240,"elapsed":426,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":111}},"source":["train.info()"],"execution_count":4,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 60000 entries, 0 to 59999\n","Columns: 171 entries, class to eg_000\n","dtypes: int64(1), object(170)\n","memory usage: 78.3+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"66rNHH8c8HOs","colab_type":"code","outputId":"b1a3b01a-74d9-4208-91e4-2e1e7c7048cf","executionInfo":{"status":"ok","timestamp":1568979561140,"user_tz":240,"elapsed":16012,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":255}},"source":["for col in train.drop(['class'], axis=1).columns:\n"," \n"," train[col] = pd.to_numeric(train[col], errors= 'coerce')\n","\n","train.head()"],"execution_count":5,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>76698</td>\n"," <td>NaN</td>\n"," <td>2.130706e+09</td>\n"," <td>280.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>37250.0</td>\n"," <td>1432864.0</td>\n"," <td>3664156.0</td>\n"," <td>1007684.0</td>\n"," <td>25896.0</td>\n"," <td>0.0</td>\n"," <td>2551696.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>4933296.0</td>\n"," <td>3655166.0</td>\n"," <td>1766008.0</td>\n"," <td>1132040.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1012.0</td>\n"," <td>268.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>469014.0</td>\n"," <td>4239660.0</td>\n"," <td>...</td>\n"," <td>18.0</td>\n"," <td>5330690.0</td>\n"," <td>4732.0</td>\n"," <td>1126.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>62282.0</td>\n"," <td>85908.0</td>\n"," <td>32790.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>202710.0</td>\n"," <td>37928.0</td>\n"," <td>14745580.0</td>\n"," <td>1876644.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2801180.0</td>\n"," <td>2445.80</td>\n"," <td>2712.0</td>\n"," <td>965866.0</td>\n"," <td>1706908.0</td>\n"," <td>1240520.0</td>\n"," <td>493384.0</td>\n"," <td>721044.0</td>\n"," <td>469792.0</td>\n"," <td>339156.0</td>\n"," <td>157956.0</td>\n"," <td>73224.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>33058</td>\n"," <td>NaN</td>\n"," <td>0.000000e+00</td>\n"," <td>NaN</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>18254.0</td>\n"," <td>653294.0</td>\n"," <td>1720800.0</td>\n"," <td>516724.0</td>\n"," <td>31642.0</td>\n"," <td>0.0</td>\n"," <td>1393352.0</td>\n"," <td>0.0</td>\n"," <td>68.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2560898.0</td>\n"," <td>2127150.0</td>\n"," <td>1084598.0</td>\n"," <td>338544.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>71510.0</td>\n"," <td>772720.0</td>\n"," <td>...</td>\n"," <td>NaN</td>\n"," <td>NaN</td>\n"," <td>3312.0</td>\n"," <td>522.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>33736.0</td>\n"," <td>36946.0</td>\n"," <td>5936.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>103330.0</td>\n"," <td>16254.0</td>\n"," <td>4510080.0</td>\n"," <td>868538.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3477820.0</td>\n"," <td>2211.76</td>\n"," <td>2334.0</td>\n"," <td>664504.0</td>\n"," <td>824154.0</td>\n"," <td>421400.0</td>\n"," <td>178064.0</td>\n"," <td>293306.0</td>\n"," <td>245416.0</td>\n"," <td>133654.0</td>\n"," <td>81140.0</td>\n"," <td>97576.0</td>\n"," <td>1500.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>41040</td>\n"," <td>NaN</td>\n"," <td>2.280000e+02</td>\n"," <td>100.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1648.0</td>\n"," <td>370592.0</td>\n"," <td>1883374.0</td>\n"," <td>292936.0</td>\n"," <td>12016.0</td>\n"," <td>0.0</td>\n"," <td>1234132.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2371990.0</td>\n"," <td>2173634.0</td>\n"," <td>300796.0</td>\n"," <td>153698.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>358.0</td>\n"," <td>110.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>870456.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>2341048.0</td>\n"," <td>1494.0</td>\n"," <td>152.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>13876.0</td>\n"," <td>38182.0</td>\n"," <td>8138.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>65772.0</td>\n"," <td>10534.0</td>\n"," <td>300240.0</td>\n"," <td>48028.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1040120.0</td>\n"," <td>1018.64</td>\n"," <td>1020.0</td>\n"," <td>262032.0</td>\n"," <td>453378.0</td>\n"," <td>277378.0</td>\n"," <td>159812.0</td>\n"," <td>423992.0</td>\n"," <td>409564.0</td>\n"," <td>320746.0</td>\n"," <td>158022.0</td>\n"," <td>95128.0</td>\n"," <td>514.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>12</td>\n"," <td>0.0</td>\n"," <td>7.000000e+01</td>\n"," <td>66.0</td>\n"," <td>0.0</td>\n"," <td>10.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>318.0</td>\n"," <td>2212.0</td>\n"," <td>3232.0</td>\n"," <td>1872.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2668.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>642.0</td>\n"," <td>3894.0</td>\n"," <td>10184.0</td>\n"," <td>7554.0</td>\n"," <td>10764.0</td>\n"," <td>1014.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>60.0</td>\n"," <td>6.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>2578.0</td>\n"," <td>76.0</td>\n"," <td>62.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>232.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2014.0</td>\n"," <td>370.0</td>\n"," <td>48.0</td>\n"," <td>18.0</td>\n"," <td>15740.0</td>\n"," <td>1822.0</td>\n"," <td>20174.0</td>\n"," <td>44.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1.08</td>\n"," <td>54.0</td>\n"," <td>5670.0</td>\n"," <td>1566.0</td>\n"," <td>240.0</td>\n"," <td>46.0</td>\n"," <td>58.0</td>\n"," <td>44.0</td>\n"," <td>10.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>4.0</td>\n"," <td>32.0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>60874</td>\n"," <td>NaN</td>\n"," <td>1.368000e+03</td>\n"," <td>458.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>43752.0</td>\n"," <td>1966618.0</td>\n"," <td>1800340.0</td>\n"," <td>131646.0</td>\n"," <td>4588.0</td>\n"," <td>0.0</td>\n"," <td>1974038.0</td>\n"," <td>0.0</td>\n"," <td>226.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3230626.0</td>\n"," <td>2618878.0</td>\n"," <td>1058136.0</td>\n"," <td>551022.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1788.0</td>\n"," <td>642.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>42124.0</td>\n"," <td>372236.0</td>\n"," <td>2128914.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>3590004.0</td>\n"," <td>2026.0</td>\n"," <td>444.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>44946.0</td>\n"," <td>62648.0</td>\n"," <td>11506.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>149474.0</td>\n"," <td>35154.0</td>\n"," <td>457040.0</td>\n"," <td>80482.0</td>\n"," <td>98334.0</td>\n"," <td>27588.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>21173050.0</td>\n"," <td>1116.06</td>\n"," <td>1176.0</td>\n"," <td>404740.0</td>\n"," <td>904230.0</td>\n"," <td>622012.0</td>\n"," <td>229790.0</td>\n"," <td>405298.0</td>\n"," <td>347188.0</td>\n"," <td>286954.0</td>\n"," <td>311560.0</td>\n"," <td>433954.0</td>\n"," <td>1218.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ... ee_008 ee_009 ef_000 eg_000\n","0 neg 76698 NaN 2.130706e+09 ... 73224.0 0.0 0.0 0.0\n","1 neg 33058 NaN 0.000000e+00 ... 97576.0 1500.0 0.0 0.0\n","2 neg 41040 NaN 2.280000e+02 ... 95128.0 514.0 0.0 0.0\n","3 neg 12 0.0 7.000000e+01 ... 0.0 0.0 4.0 32.0\n","4 neg 60874 NaN 1.368000e+03 ... 433954.0 1218.0 0.0 0.0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"id":"q7i24-Vb-C1p","colab_type":"code","outputId":"6f7932b6-ae7d-4dba-ec7a-451077e926c4","executionInfo":{"status":"ok","timestamp":1568979566757,"user_tz":240,"elapsed":1520,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":73}},"source":["index = train.isna().apply(sum) > 30000\n","\n","train.columns[index]"],"execution_count":6,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['ab_000', 'bm_000', 'bn_000', 'bo_000', 'bp_000', 'bq_000', 'br_000',\n"," 'cr_000'],\n"," dtype='object')"]},"metadata":{"tags":[]},"execution_count":6}]},{"cell_type":"code","metadata":{"id":"WE_T70K4Fn57","colab_type":"code","colab":{}},"source":["train.drop(['ab_000', 'bm_000', 'bn_000', 'bo_000', 'bp_000', 'bq_000', 'br_000',\n"," 'cr_000'], axis=1, inplace = True)\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"yqYul8j4HGC9","colab_type":"code","outputId":"54f89e27-8b87-4331-8ef4-e12562f7460a","executionInfo":{"status":"ok","timestamp":1568979575840,"user_tz":240,"elapsed":616,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":111}},"source":["train.dropna().info()"],"execution_count":8,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 20915 entries, 0 to 59999\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 26.2+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"MoyKKysAIV17","colab_type":"code","outputId":"d78919e9-6a55-430d-cc1a-b2949e12f76a","executionInfo":{"status":"ok","timestamp":1568907401756,"user_tz":240,"elapsed":608,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["list(train.dropna()['class']).count('pos')"],"execution_count":0,"outputs":[{"output_type":"execute_result","data":{"text/plain":["183"]},"metadata":{"tags":[]},"execution_count":34}]},{"cell_type":"code","metadata":{"id":"l4-kcTSjKoLu","colab_type":"code","colab":{}},"source":["list(train['class']).count('pos')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"zcNQVR4S0QpZ","colab_type":"code","outputId":"0d409300-f35f-4712-8de5-79933dd4efef","executionInfo":{"status":"ok","timestamp":1568979588585,"user_tz":240,"elapsed":2465,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":255}},"source":["test = pd.read_csv(\"aps_failure_test_set.csv\" , skiprows=20)\n","\n","test.head()"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>60</td>\n"," <td>0</td>\n"," <td>20</td>\n"," <td>12</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2682</td>\n"," <td>4736</td>\n"," <td>3862</td>\n"," <td>1846</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3976</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1520</td>\n"," <td>2374</td>\n"," <td>11516</td>\n"," <td>9480</td>\n"," <td>111258</td>\n"," <td>470</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>58</td>\n"," <td>26</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>6598</td>\n"," <td>70</td>\n"," <td>112</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>340</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1100</td>\n"," <td>574</td>\n"," <td>232</td>\n"," <td>66</td>\n"," <td>780</td>\n"," <td>882</td>\n"," <td>0</td>\n"," <td>4</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>465.5</td>\n"," <td>90</td>\n"," <td>7502</td>\n"," <td>3156</td>\n"," <td>1098</td>\n"," <td>138</td>\n"," <td>412</td>\n"," <td>654</td>\n"," <td>78</td>\n"," <td>88</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>82</td>\n"," <td>0</td>\n"," <td>68</td>\n"," <td>40</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>748</td>\n"," <td>12594</td>\n"," <td>3636</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>5244</td>\n"," <td>0</td>\n"," <td>60</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>23174</td>\n"," <td>18166</td>\n"," <td>23686</td>\n"," <td>1270</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>12</td>\n"," <td>82</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>7918</td>\n"," <td>78</td>\n"," <td>40</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>352</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>3996</td>\n"," <td>584</td>\n"," <td>200</td>\n"," <td>62</td>\n"," <td>37580</td>\n"," <td>3756</td>\n"," <td>6368</td>\n"," <td>36</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>2.86</td>\n"," <td>102</td>\n"," <td>10040</td>\n"," <td>3310</td>\n"," <td>1068</td>\n"," <td>276</td>\n"," <td>1620</td>\n"," <td>116</td>\n"," <td>86</td>\n"," <td>462</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>66002</td>\n"," <td>2</td>\n"," <td>212</td>\n"," <td>112</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>199486</td>\n"," <td>1358536</td>\n"," <td>1952422</td>\n"," <td>452706</td>\n"," <td>25130</td>\n"," <td>520</td>\n"," <td>0</td>\n"," <td>1891670</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>77898</td>\n"," <td>110548</td>\n"," <td>3605894</td>\n"," <td>3291610</td>\n"," <td>959756</td>\n"," <td>286536</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>106</td>\n"," <td>340</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>24286</td>\n"," <td>681260</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>3613906</td>\n"," <td>4218</td>\n"," <td>692</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>25278</td>\n"," <td>9438</td>\n"," <td>2504</td>\n"," <td>10262714</td>\n"," <td>1278664</td>\n"," <td>109700</td>\n"," <td>19072</td>\n"," <td>9520</td>\n"," <td>4902</td>\n"," <td>4434614</td>\n"," <td>70900</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>26002880</td>\n"," <td>2057.84</td>\n"," <td>2158</td>\n"," <td>396312</td>\n"," <td>538136</td>\n"," <td>495076</td>\n"," <td>380368</td>\n"," <td>440134</td>\n"," <td>269556</td>\n"," <td>1315022</td>\n"," <td>153680</td>\n"," <td>516</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>59816</td>\n"," <td>na</td>\n"," <td>1010</td>\n"," <td>936</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>123922</td>\n"," <td>984314</td>\n"," <td>1680050</td>\n"," <td>1135268</td>\n"," <td>92606</td>\n"," <td>14038</td>\n"," <td>1772828</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1116</td>\n"," <td>2372</td>\n"," <td>3546760</td>\n"," <td>3053176</td>\n"," <td>652616</td>\n"," <td>423374</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>7274</td>\n"," <td>0</td>\n"," <td>1622</td>\n"," <td>432</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>6388</td>\n"," <td>1091104</td>\n"," <td>...</td>\n"," <td>46</td>\n"," <td>3413978</td>\n"," <td>2924</td>\n"," <td>414</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>60</td>\n"," <td>38710</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>27740</td>\n"," <td>33354</td>\n"," <td>6330</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>133542</td>\n"," <td>21290</td>\n"," <td>2718360</td>\n"," <td>435370</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1179900</td>\n"," <td>1541.32</td>\n"," <td>1678</td>\n"," <td>659550</td>\n"," <td>691580</td>\n"," <td>540820</td>\n"," <td>243270</td>\n"," <td>483302</td>\n"," <td>485332</td>\n"," <td>431376</td>\n"," <td>210074</td>\n"," <td>281662</td>\n"," <td>3232</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>1814</td>\n"," <td>na</td>\n"," <td>156</td>\n"," <td>140</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>72</td>\n"," <td>17926</td>\n"," <td>82834</td>\n"," <td>3114</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>48978</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>97146</td>\n"," <td>89920</td>\n"," <td>12932</td>\n"," <td>5092</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>102</td>\n"," <td>50</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>11544</td>\n"," <td>73570</td>\n"," <td>...</td>\n"," <td>0</td>\n"," <td>95372</td>\n"," <td>78</td>\n"," <td>36</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>538</td>\n"," <td>360</td>\n"," <td>142</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1822</td>\n"," <td>344</td>\n"," <td>2140</td>\n"," <td>394</td>\n"," <td>13664</td>\n"," <td>110</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>813740</td>\n"," <td>113.86</td>\n"," <td>52</td>\n"," <td>10216</td>\n"," <td>9958</td>\n"," <td>7646</td>\n"," <td>4144</td>\n"," <td>18466</td>\n"," <td>49782</td>\n"," <td>3176</td>\n"," <td>482</td>\n"," <td>76</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ad_000 ... ee_007 ee_008 ee_009 ef_000 eg_000\n","0 neg 60 0 20 12 ... 88 0 0 0 0\n","1 neg 82 0 68 40 ... 462 0 0 0 0\n","2 neg 66002 2 212 112 ... 153680 516 0 0 0\n","3 neg 59816 na 1010 936 ... 210074 281662 3232 0 0\n","4 neg 1814 na 156 140 ... 482 76 0 0 0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"pUo7iz2H1Dvg","colab_type":"code","outputId":"043f9a96-f185-42d3-e287-15c9e6fc683f","executionInfo":{"status":"ok","timestamp":1568979594055,"user_tz":240,"elapsed":4875,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":255}},"source":["for col in test.drop(['class'], axis=1).columns:\n"," \n"," test[col] = pd.to_numeric(test[col], errors= 'coerce')\n","\n","test.head()"],"execution_count":10,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>class</th>\n"," <th>aa_000</th>\n"," <th>ab_000</th>\n"," <th>ac_000</th>\n"," <th>ad_000</th>\n"," <th>ae_000</th>\n"," <th>af_000</th>\n"," <th>ag_000</th>\n"," <th>ag_001</th>\n"," <th>ag_002</th>\n"," <th>ag_003</th>\n"," <th>ag_004</th>\n"," <th>ag_005</th>\n"," <th>ag_006</th>\n"," <th>ag_007</th>\n"," <th>ag_008</th>\n"," <th>ag_009</th>\n"," <th>ah_000</th>\n"," <th>ai_000</th>\n"," <th>aj_000</th>\n"," <th>ak_000</th>\n"," <th>al_000</th>\n"," <th>am_0</th>\n"," <th>an_000</th>\n"," <th>ao_000</th>\n"," <th>ap_000</th>\n"," <th>aq_000</th>\n"," <th>ar_000</th>\n"," <th>as_000</th>\n"," <th>at_000</th>\n"," <th>au_000</th>\n"," <th>av_000</th>\n"," <th>ax_000</th>\n"," <th>ay_000</th>\n"," <th>ay_001</th>\n"," <th>ay_002</th>\n"," <th>ay_003</th>\n"," <th>ay_004</th>\n"," <th>ay_005</th>\n"," <th>ay_006</th>\n"," <th>...</th>\n"," <th>db_000</th>\n"," <th>dc_000</th>\n"," <th>dd_000</th>\n"," <th>de_000</th>\n"," <th>df_000</th>\n"," <th>dg_000</th>\n"," <th>dh_000</th>\n"," <th>di_000</th>\n"," <th>dj_000</th>\n"," <th>dk_000</th>\n"," <th>dl_000</th>\n"," <th>dm_000</th>\n"," <th>dn_000</th>\n"," <th>do_000</th>\n"," <th>dp_000</th>\n"," <th>dq_000</th>\n"," <th>dr_000</th>\n"," <th>ds_000</th>\n"," <th>dt_000</th>\n"," <th>du_000</th>\n"," <th>dv_000</th>\n"," <th>dx_000</th>\n"," <th>dy_000</th>\n"," <th>dz_000</th>\n"," <th>ea_000</th>\n"," <th>eb_000</th>\n"," <th>ec_00</th>\n"," <th>ed_000</th>\n"," <th>ee_000</th>\n"," <th>ee_001</th>\n"," <th>ee_002</th>\n"," <th>ee_003</th>\n"," <th>ee_004</th>\n"," <th>ee_005</th>\n"," <th>ee_006</th>\n"," <th>ee_007</th>\n"," <th>ee_008</th>\n"," <th>ee_009</th>\n"," <th>ef_000</th>\n"," <th>eg_000</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>neg</td>\n"," <td>60</td>\n"," <td>0.0</td>\n"," <td>20.0</td>\n"," <td>12.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2682.0</td>\n"," <td>4736.0</td>\n"," <td>3862.0</td>\n"," <td>1846.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3976.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1520.0</td>\n"," <td>2374.0</td>\n"," <td>11516.0</td>\n"," <td>9480.0</td>\n"," <td>111258.0</td>\n"," <td>470.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>58.0</td>\n"," <td>26.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>6598.0</td>\n"," <td>70.0</td>\n"," <td>112.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>340.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1100.0</td>\n"," <td>574.0</td>\n"," <td>232.0</td>\n"," <td>66.0</td>\n"," <td>780.0</td>\n"," <td>882.0</td>\n"," <td>0.0</td>\n"," <td>4.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>465.50</td>\n"," <td>90.0</td>\n"," <td>7502.0</td>\n"," <td>3156.0</td>\n"," <td>1098.0</td>\n"," <td>138.0</td>\n"," <td>412.0</td>\n"," <td>654.0</td>\n"," <td>78.0</td>\n"," <td>88.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>neg</td>\n"," <td>82</td>\n"," <td>0.0</td>\n"," <td>68.0</td>\n"," <td>40.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>748.0</td>\n"," <td>12594.0</td>\n"," <td>3636.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>5244.0</td>\n"," <td>0.0</td>\n"," <td>60.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>23174.0</td>\n"," <td>18166.0</td>\n"," <td>23686.0</td>\n"," <td>1270.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>12.0</td>\n"," <td>82.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>7918.0</td>\n"," <td>78.0</td>\n"," <td>40.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>352.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>3996.0</td>\n"," <td>584.0</td>\n"," <td>200.0</td>\n"," <td>62.0</td>\n"," <td>37580.0</td>\n"," <td>3756.0</td>\n"," <td>6368.0</td>\n"," <td>36.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>2.86</td>\n"," <td>102.0</td>\n"," <td>10040.0</td>\n"," <td>3310.0</td>\n"," <td>1068.0</td>\n"," <td>276.0</td>\n"," <td>1620.0</td>\n"," <td>116.0</td>\n"," <td>86.0</td>\n"," <td>462.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>neg</td>\n"," <td>66002</td>\n"," <td>2.0</td>\n"," <td>212.0</td>\n"," <td>112.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>199486.0</td>\n"," <td>1358536.0</td>\n"," <td>1952422.0</td>\n"," <td>452706.0</td>\n"," <td>25130.0</td>\n"," <td>520.0</td>\n"," <td>0.0</td>\n"," <td>1891670.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>77898.0</td>\n"," <td>110548.0</td>\n"," <td>3605894.0</td>\n"," <td>3291610.0</td>\n"," <td>959756.0</td>\n"," <td>286536.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>106.0</td>\n"," <td>340.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>24286.0</td>\n"," <td>681260.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>3613906.0</td>\n"," <td>4218.0</td>\n"," <td>692.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>25278.0</td>\n"," <td>9438.0</td>\n"," <td>2504.0</td>\n"," <td>10262714.0</td>\n"," <td>1278664.0</td>\n"," <td>109700.0</td>\n"," <td>19072.0</td>\n"," <td>9520.0</td>\n"," <td>4902.0</td>\n"," <td>4434614.0</td>\n"," <td>70900.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>26002880.0</td>\n"," <td>2057.84</td>\n"," <td>2158.0</td>\n"," <td>396312.0</td>\n"," <td>538136.0</td>\n"," <td>495076.0</td>\n"," <td>380368.0</td>\n"," <td>440134.0</td>\n"," <td>269556.0</td>\n"," <td>1315022.0</td>\n"," <td>153680.0</td>\n"," <td>516.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>neg</td>\n"," <td>59816</td>\n"," <td>NaN</td>\n"," <td>1010.0</td>\n"," <td>936.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>123922.0</td>\n"," <td>984314.0</td>\n"," <td>1680050.0</td>\n"," <td>1135268.0</td>\n"," <td>92606.0</td>\n"," <td>14038.0</td>\n"," <td>1772828.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1116.0</td>\n"," <td>2372.0</td>\n"," <td>3546760.0</td>\n"," <td>3053176.0</td>\n"," <td>652616.0</td>\n"," <td>423374.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>7274.0</td>\n"," <td>0.0</td>\n"," <td>1622.0</td>\n"," <td>432.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>6388.0</td>\n"," <td>1091104.0</td>\n"," <td>...</td>\n"," <td>46.0</td>\n"," <td>3413978.0</td>\n"," <td>2924.0</td>\n"," <td>414.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>60.0</td>\n"," <td>38710.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>27740.0</td>\n"," <td>33354.0</td>\n"," <td>6330.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>133542.0</td>\n"," <td>21290.0</td>\n"," <td>2718360.0</td>\n"," <td>435370.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1179900.0</td>\n"," <td>1541.32</td>\n"," <td>1678.0</td>\n"," <td>659550.0</td>\n"," <td>691580.0</td>\n"," <td>540820.0</td>\n"," <td>243270.0</td>\n"," <td>483302.0</td>\n"," <td>485332.0</td>\n"," <td>431376.0</td>\n"," <td>210074.0</td>\n"," <td>281662.0</td>\n"," <td>3232.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>neg</td>\n"," <td>1814</td>\n"," <td>NaN</td>\n"," <td>156.0</td>\n"," <td>140.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>72.0</td>\n"," <td>17926.0</td>\n"," <td>82834.0</td>\n"," <td>3114.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>48978.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>97146.0</td>\n"," <td>89920.0</td>\n"," <td>12932.0</td>\n"," <td>5092.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>102.0</td>\n"," <td>50.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>11544.0</td>\n"," <td>73570.0</td>\n"," <td>...</td>\n"," <td>0.0</td>\n"," <td>95372.0</td>\n"," <td>78.0</td>\n"," <td>36.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>538.0</td>\n"," <td>360.0</td>\n"," <td>142.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>1822.0</td>\n"," <td>344.0</td>\n"," <td>2140.0</td>\n"," <td>394.0</td>\n"," <td>13664.0</td>\n"," <td>110.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>813740.0</td>\n"," <td>113.86</td>\n"," <td>52.0</td>\n"," <td>10216.0</td>\n"," <td>9958.0</td>\n"," <td>7646.0</td>\n"," <td>4144.0</td>\n"," <td>18466.0</td>\n"," <td>49782.0</td>\n"," <td>3176.0</td>\n"," <td>482.0</td>\n"," <td>76.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," <td>0.0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>5 rows × 171 columns</p>\n","</div>"],"text/plain":[" class aa_000 ab_000 ac_000 ... ee_008 ee_009 ef_000 eg_000\n","0 neg 60 0.0 20.0 ... 0.0 0.0 0.0 0.0\n","1 neg 82 0.0 68.0 ... 0.0 0.0 0.0 0.0\n","2 neg 66002 2.0 212.0 ... 516.0 0.0 0.0 0.0\n","3 neg 59816 NaN 1010.0 ... 281662.0 3232.0 0.0 0.0\n","4 neg 1814 NaN 156.0 ... 76.0 0.0 0.0 0.0\n","\n","[5 rows x 171 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"1qGBAgY1H4OL","colab_type":"code","outputId":"60ddb111-54e9-446b-bf80-04c2d94ee86f","executionInfo":{"status":"ok","timestamp":1568979598198,"user_tz":240,"elapsed":1599,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":111}},"source":["test.info()"],"execution_count":11,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","RangeIndex: 16000 entries, 0 to 15999\n","Columns: 171 entries, class to eg_000\n","dtypes: float64(169), int64(1), object(1)\n","memory usage: 20.9+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"UZ2cITzvIKpb","colab_type":"code","colab":{"base_uri":"https://localhost:8080/","height":348},"outputId":"1f8bdd66-07a1-4f22-a6f2-99469c9b7e3d","executionInfo":{"status":"error","timestamp":1568979601473,"user_tz":240,"elapsed":575,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}}},"source":["test.drop(['ab_000', 'bm_000', 'bn_000', 'bo_000', 'bp_000', 'bq_000', 'br_000',\n"," 'cr_000'], axis=1, inplace = True)"],"execution_count":13,"outputs":[{"output_type":"error","ename":"KeyError","evalue":"ignored","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)","\u001b[0;32m<ipython-input-13-ff96cfe9476f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m test.drop(['ab_000', 'bm_000', 'bn_000', 'bo_000', 'bp_000', 'bq_000', 'br_000',\n\u001b[0;32m----> 2\u001b[0;31m 'cr_000'], axis=1, inplace = True)\n\u001b[0m","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 3938\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3939\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minplace\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3940\u001b[0;31m errors=errors)\n\u001b[0m\u001b[1;32m 3941\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3942\u001b[0m @rewrite_axis_style_signature('mapper', [('copy', True),\n","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 3778\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3779\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3780\u001b[0;31m \u001b[0mobj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3781\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3782\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[0;34m(self, labels, axis, level, errors)\u001b[0m\n\u001b[1;32m 3810\u001b[0m \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3811\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3812\u001b[0;31m \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3813\u001b[0m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreindex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0maxis_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mnew_axis\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3814\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m 4963\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m'ignore'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4964\u001b[0m raise KeyError(\n\u001b[0;32m-> 4965\u001b[0;31m '{} not found in axis'.format(labels[mask]))\n\u001b[0m\u001b[1;32m 4966\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4967\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mKeyError\u001b[0m: \"['ab_000' 'bm_000' 'bn_000' 'bo_000' 'bp_000' 'bq_000' 'br_000' 'cr_000'] not found in axis\""]}]},{"cell_type":"code","metadata":{"id":"7wRkce-rH7M_","colab_type":"code","outputId":"ddf36c56-abde-485a-b422-efc8224659cd","executionInfo":{"status":"ok","timestamp":1568979608451,"user_tz":240,"elapsed":369,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":111}},"source":["test.dropna().info()"],"execution_count":14,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 5620 entries, 2 to 15997\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 7.0+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"_pS9tSR6Lgum","colab_type":"code","outputId":"86a702f1-47ce-4a04-d329-2920bcd0d3f1","executionInfo":{"status":"ok","timestamp":1568979611869,"user_tz":240,"elapsed":599,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":111}},"source":["train1 = train.dropna()\n","\n","train1.info()"],"execution_count":15,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 20915 entries, 0 to 59999\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 26.2+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"c0hNvW7kLZcX","colab_type":"code","outputId":"d6fb9535-383a-41ee-d2a6-fbd8b2eeba75","executionInfo":{"status":"ok","timestamp":1568979614988,"user_tz":240,"elapsed":350,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":111}},"source":["test1 = test.dropna()\n","\n","test1.info()"],"execution_count":16,"outputs":[{"output_type":"stream","text":["<class 'pandas.core.frame.DataFrame'>\n","Int64Index: 5620 entries, 2 to 15997\n","Columns: 163 entries, class to eg_000\n","dtypes: float64(161), int64(1), object(1)\n","memory usage: 7.0+ MB\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"XjmByTaweTG5","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3M8DVJ1_NlWe","colab_type":"code","colab":{}},"source":["X_train = train1.drop(['class'], axis=1)\n","\n","y_train = train1['class']\n","\n","X_test = test1.drop(['class'], axis=1)\n","\n","y_test = test1['class']"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"c8GPtuulQNZE","colab_type":"code","colab":{}},"source":["\n","x = X_train.values #returns a numpy array\n","min_max_scaler = preprocessing.MinMaxScaler()\n","x_scaled = min_max_scaler.fit_transform(x)\n","X_train = pd.DataFrame(x_scaled)\n","\n","\n","x = X_test.values #returns a numpy array\n","min_max_scaler = preprocessing.MinMaxScaler()\n","x_scaled = min_max_scaler.fit_transform(x)\n","X_test = pd.DataFrame(x_scaled)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"oSo0OblqOfrZ","colab_type":"code","colab":{}},"source":["y_train = (pd.get_dummies(y_train)['pos'])\n","\n","y_test = (pd.get_dummies(y_test)['pos'])\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-0Y2r7h_g3I5","colab_type":"code","colab":{}},"source":["from keras.utils import to_categorical\n","y_train = to_categorical(y_train)\n","\n","y_test = to_categorical(y_test)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"uW2WICq93gGK","colab_type":"code","colab":{}},"source":["def build_model():\n"," # Because we will need to instantiate\n"," # the same model multiple times,\n"," # we use a function to construct it.\n"," model = models.Sequential()\n"," model.add(layers.Dense(64, activation='relu',\n"," input_shape=(X_train.shape[1],)))\n"," model.add(layers.Dense(64, activation='relu'))\n"," model.add(layers.Dense(2, activation='softmax'))\n"," model.compile(optimizer='rmsprop',\n"," loss='sparse_categorical_crossentropy',\n"," metrics=['accuracy'])\n"," return model"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"sUb1K7hc9_HK","colab_type":"code","outputId":"9165ca4a-64d1-4103-9b50-d7fc0a55d654","executionInfo":{"status":"ok","timestamp":1568980438418,"user_tz":240,"elapsed":22704,"user":{"displayName":"Srikanth Namuduri","photoUrl":"","userId":"14954669034887449909"}},"colab":{"base_uri":"https://localhost:8080/","height":881}},"source":["model = models.Sequential()\n","model.add(layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)))\n","model.add(layers.Dense(64, activation='relu'))\n","model.add(layers.Dense(2, activation='softmax'))\n","model.compile(optimizer='rmsprop',loss='categorical_crossentropy', metrics=['accuracy'])\n","history = model.fit(X_train,y_train,epochs=20,batch_size=32)"],"execution_count":36,"outputs":[{"output_type":"stream","text":["WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n","Instructions for updating:\n","Use tf.where in 2.0, which has the same broadcast rule as np.where\n","WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:1033: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.\n","\n","Epoch 1/20\n","20915/20915 [==============================] - 2s 88us/step - loss: 0.0502 - acc: 0.9922\n","Epoch 2/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0237 - acc: 0.9948\n","Epoch 3/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0211 - acc: 0.9952\n","Epoch 4/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0212 - acc: 0.9959\n","Epoch 5/20\n","20915/20915 [==============================] - 1s 48us/step - loss: 0.0216 - acc: 0.9961\n","Epoch 6/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0220 - acc: 0.9961\n","Epoch 7/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0223 - acc: 0.9959\n","Epoch 8/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0222 - acc: 0.9960\n","Epoch 9/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0209 - acc: 0.9961\n","Epoch 10/20\n","20915/20915 [==============================] - 1s 48us/step - loss: 0.0216 - acc: 0.9959\n","Epoch 11/20\n","20915/20915 [==============================] - 1s 53us/step - loss: 0.0210 - acc: 0.9962\n","Epoch 12/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0212 - acc: 0.9962\n","Epoch 13/20\n","20915/20915 [==============================] - 1s 50us/step - loss: 0.0217 - acc: 0.9960\n","Epoch 14/20\n","20915/20915 [==============================] - 1s 48us/step - loss: 0.0211 - acc: 0.9966\n","Epoch 15/20\n","20915/20915 [==============================] - 1s 52us/step - loss: 0.0217 - acc: 0.9962\n","Epoch 16/20\n","20915/20915 [==============================] - 1s 49us/step - loss: 0.0224 - acc: 0.9963\n","Epoch 17/20\n","20915/20915 [==============================] - 1s 52us/step - loss: 0.0222 - acc: 0.9964\n","Epoch 18/20\n","20915/20915 [==============================] - 1s 52us/step - loss: 0.0218 - acc: 0.9966\n","Epoch 19/20\n","20915/20915 [==============================] - 1s 49us/step - loss: 0.0230 - acc: 0.9964\n","Epoch 20/20\n","20915/20915 [==============================] - 1s 51us/step - loss: 0.0230 - acc: 0.9964\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"uduhdNw59_K6","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]}