Machine Learning
{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# BCS315 Midterm Exam" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# 1. Conduct Lab03A Linear Regression operation by the following procedure." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Import numpy, pandas, and stasmodels.api." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd \n", "import statsmodels.api as sm" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Read “Advertising.csv” data and save them to df with the first column as the index of df. \n", "# Display head data set." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>TV</th>\n", " <th>Radio</th>\n", " <th>Newspaper</th>\n", " <th>Sales</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>230.1</td>\n", " <td>37.8</td>\n", " <td>69.2</td>\n", " <td>22.1</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>44.5</td>\n", " <td>39.3</td>\n", " <td>45.1</td>\n", " <td>10.4</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>17.2</td>\n", " <td>45.9</td>\n", " <td>69.3</td>\n", " <td>9.3</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>151.5</td>\n", " <td>41.3</td>\n", " <td>58.5</td>\n", " <td>18.5</td>\n", " </tr>\n", " <tr>\n", " <td>5</td>\n", " <td>180.8</td>\n", " <td>10.8</td>\n", " <td>58.4</td>\n", " <td>12.9</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " TV Radio Newspaper Sales\n", "1 230.1 37.8 69.2 22.1\n", "2 44.5 39.3 45.1 10.4\n", "3 17.2 45.9 69.3 9.3\n", "4 151.5 41.3 58.5 18.5\n", "5 180.8 10.8 58.4 12.9" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('Advertising.csv', index_col=0)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ " # Import seaborn and matplotlib.pyplot." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "import seaborn as sns\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# a) Draw a regplot diagram between TV as x-axis and Sales as y-axis. " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEGCAYAAABiq/5QAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO29e3xb5ZWv/7xbkiVf5NiJ7dxDEgiESwmE0MOtwUC4QyklpYSEds7QSc6000470w5MO+3p0M7vA22nPaennGnSltOLIS0EWmgL4RIwhjYBkpBwSyDgJOTuOPFFtqzb1vv7Y0u2rFiyJOtqrYdPkLz31tZ699767nevd71rKa01giAIQvlgFNoAQRAEIb+I8AuCIJQZIvyCIAhlhgi/IAhCmSHCLwiCUGbYC21AKjQ0NOjZs2cX2gxBEISSYsuWLZ1a68b45SUh/LNnz2bz5s2FNkMQBKGkUErtHWm5uHoEQRDKDBF+QRCEMkOEXxAEocwQ4RcEQSgzRPgFQRDKjJKI6hEEQShmWnd2sLqtnX1dXmbWV7Fq8Vya5zcV2qyESI9fEARhDLTu7OBbT7xNh8dHXaWDDo+Pbz3xNq07OwptWkJE+AVBEMbA6rZ2HDZFVYUdpaxXh02xuq290KYlRIRfEARhDOzr8lLpsA1bVumwsb/LWyCLRkeEXxAEYQzMrK9iIGgOWzYQNJlRX1Ugi0ZHhF8QBGEMrFo8l6Cp8QZCaG29Bk3NqsVzC21aQkT4BUEQxkDz/Cbu+fiZNLld9AwEaXK7uOfjZxZ1VI+EcwqCIIyR5vlNKQt9MYR+So9fEAQhTxRL6KcIvyAIQp4oltBPEX5BEIQ8USyhnyL8giAIeaJYQj9lcFcQBCGHxA7mup12egaCgNXTHwiaBQn9FOEXBKFkyXWEzFj3Hx3MddgUdZUOBoImCnAYisO9PgKh8DAff76ie8TVIwhCSZLrCJls7H+kwdzaSsfg+0a3k6kTKvMe3SPCLwhCSZLrCJls7D/RYG57Z39Bo3tyJvxKqZlKqReUUjuUUm8rpf4xsvzbSqkDSqltkX/X5coGQRDGL7mOkMnG/hMN5kb3NZZ9j4Vc9vhDwD9rrU8HLgC+oJQ6I7LuR1rrcyL/nsyhDYIgjFNyHSGTjf0nyuMzZ1Jho3tyJvxa60Na662R9x5gBzA9V98nCELx0bqzg2VrNnHJfc+zbM2mrPqwc50cLRv7T5TH5+5rTy9oYjeltc79lyg1G2gDzgL+CfgboBfYjPVU0DXCZ1YCKwFmzZp13t69e3NupyAI2SM2oiU2dDGbCcyiUTf7u7zMyGFUTy72n2vbAZRSW7TWi05YnmvhV0rVAC8C/6G1fkwpNRnoBDTwHWCq1vpvk+1j0aJFevPmzTm1UxCE7LJszSY6PD6qKoaixr2BEE1uF2tXXlBAy8qHRMKf0zh+pZQDeBR4UGv9GIDW+kjM+p8Bf8qlDYIgFIZ9XV7qKh3DlhV7ZapsUgxZOBORM+FXSingF8AOrfUPY5ZP1Vofivx5M/BWrmwQBKFwzKyvOqHHX6yVqbIt0vETt6Jx+veQv0laychlVM/FwB3A5XGhm99TSr2plHoDuAz4Sg5tEAShQJRKZapcTAQrliycichZj19r/TKgRlgl4ZuCUAY0z2/iHsj5AOZYiRVpgKoKO95AiNVt7RnbWuxuLsnVIwhC2qTqGkmnMlWhyIVIj9XNlevxAUnZIAhCWvH2xVJFKlvkYiLYWNxc+Ti+IvyCUOakKzTF7r9Ol1yMRYylAHs+jq+4egShzEnXx13s/ut0ydVYRKZurnwcXxF+QShz0hWaUgrTTJViGovIx/EVV48glDnp+rgLGaaZy9w/xUI+jq8IvyCUOekKzVj812NhvA0qJyIfxzcvSdrGiuTqEYTcko+EYWNFcv+kT0Fy9QiCUBoUk487EeNtULmQiKtHEISSINeFV8oJ6fELgpCQdGeQ5nLG6arFc/nWE2/jDYSG5fcvttw/pYD4+AWhTBlNpNMtpDIeCq+MN8THLwjCIKmkDU53Yld0+5Cp2d3TT8AMYzMU9z61I2viXApjEaWACL8glCGpiHq6g6n7urzYFBzs8WGgsClFOKzZdbSP1p0dBXERFXMxlEIig7uCUIbs6/JS6bANWxYv6ukOps6sr+KIx4+BwjAUSln/HIaRNM9MruLzyyXuPxNE+AWhDElF1NOd2BXdPvpfWGu0hsm1zqQhl7lKSjbeksllExF+QShDUhH1dGeQNs9v4tSmGgylMMMau6GYVufCbjOShlym8vQRTyqpGzLZb7kgPn5BKENSzUiZymBqrB/d7bRT47IzodKRcshluknJUq1nOx6TyaWE1vD669DSknATEX5BKFOyESETL8IDQRMFOAxFz0Bw1JDL1p0ddHsD7DnmxWFTTHY7sduMpDeLVKONyi7uf88eeOghS/B37Ei6qQi/IAgZM5IIA9RXO1n/leT5c2JvGjPqXBzp9bO/e4B5jTV88/ozEt4sUo02KpWav1EyikDq6oJHHrHE/qWXhq+bPx927hzxYyL8glDiFDJkcSz5c+JvGrWVFXgDIeqrnUntT8eFUypx/6m6rwDw+eDJJy2x//OfIRAYWjd5Mtx+O6xYAeeeC8bIw7gyuCsIJUyhQxbHkj8n08HXQtYDyBWjRiCFw9DWBitXwtSpcMst8PvfW6JfXQ133AHr18P+/fDDH8LChaBUwu8T4ReEEqbQIYtjEeFMbxqFqgeQSxLdBO0734Gvfx3mzIFLL4Wf/Qy6u8Fmg2uusXr9R47Ar38NV18N9tScOOLqEYQSptCpisfiRx/L4GupuHBSJdZ9NbGnk49teY5LX3uGeQd2Dd/w/PMtN86nP225dTJEhF8QSph4f3fvQJAjHh9aW4VL8uHvz1SE071pFFv6hWza8/nzmmj73s9YsvUZFn2wDZsOD62cM8cS++XL4bTTsmK7ZOcUhBImdlAwZIY50O0DYHpk4lS2s2MWinxk/sy7PcEgPP00PPggPP44DAwMruqudPP8Ry7lufOu4tYv3krz6Zn17iU7pyCMQ2J7zVs/7MJuKKZMcOF2We6fZNk0S4l0M4UWrT1aw6uvWr753/4WOjsHVwUcFfzljIt46aPX8PrpHyVkd1j7fGl3xsKfCBF+QShxoq6WS+57nrpKByommmO8pCgo9FjGmO15/32rZ9/SYr2PohRcdhksX86N+xpw1Nfn5fyJ8AvCOGE8pyhIp21j8b2n+tmU7Dl6FH73O0vwN20avoOzz7Z89rffDjNmADBxhGLyuTp/Es4pCOOEYoxvTyWZWiqk2raxzGtI57OJ7Pn786dYYn/jjTBtGnzxi0OiP306fO1rsH279e9f/mVQ9NNpYzbI2eCuUmom8GtgChAG1mit/7dSaiLwO2A2sAe4VWvdlWxfMrgrCKlRTKUJsz0gm0rblo3Qa/YGQjS5XaxdmTyFRLqfjdpz8JiHqzp2cOeevzJlw5Pg8QxtVFsLS5daUTmLF1vx92NsYzoUYnA3BPyz1nqrUsoNbFFKPQv8DbBBa32vUupu4G7grhzaIQhlQzHFt2d7QDaVto3me0/myknLb681zb6DNL+3zkqMdujQ0DqHA6691hL7G26AysqstjEb5Ez4tdaHgEOR9x6l1A5gOnAT0BzZ7FdAKyL8gjDuSEdIsxUTn8z3Plo+nJT89h9+OJQB8+23h3/5xRdbYv+pT8GkSWnbnk/yMrirlJoNnAu8AkyO3BTQWh9SSo14dpVSK4GVALNmzcqHmYJQEIptYlK2mFlfxZ5jffQOhAiYYSpsBrWVdmZPqhm2XVoJykYh2Wzg0Z5AEn32C+dMgp//3BL7F18c/oWnnWaJ/e23w9zSyRWUc+FXStUAjwJf1lr3qiSJg2LRWq8B1oDl48+dhYJQOLIpesVC9Eb25oEu+vxhbAbYDUXADNPhCbDs/InDts+mSyjZbOB/e/ytpE8gsZ89crSb6/dv4zMfvEzjN58dngGzqQmWLbOichYtSpoMrVjJqfArpRxYov+g1vqxyOIjSqmpkd7+VEAqHwtlS7FNTBorsTcyrRWGshJLhgCX3cDtsrOx/ThfivlMtmP0E/nJR3XlhMM0H32X5i0t8PDDVjK0KFVVcPPNVu9+yZKUk6EVKzmzXlld+18AO7TWP4xZ9QTwWeDeyOvjubJBENKhEC6XQkxMymU7Y29kUfeOxurxz22sQWt9QtvyNf8gkSvnK9ND8I1vWPH2e/cOfcAw4MorLbH/xCegpibxzkuMXN62LgbuAN5USm2LLPs6luA/rJS6E/gQ+FQObRCElCiUyyXfk65y3c7YG1mFzSBkapQBAdNKOjZS2/JVIjHWlTOwdx+f3fUyN7/divudN4ZveN55ltjfdhtMmZJVG4qFXEb1vAwkcn5dkavvFYRMKJTLJZHoXTh3IsvWbMp6rzyddmbyZBB7I2uocXKwZwDCVg3eRBOSUsnSmY4tCbft66P5tadpfrQFnnvO8kFFmT17KAPm/PmpH9ASpbQdVYKQJQqVC2Yk0btw7kTWbT2Qk155qu3M9Mkg9kbmdtmZFKqgyxukymmnye1KKNjJ4tfTsSV+287ufv78vQc4/ehrTH5+PXhj2llfb+W1X7ECLrwwYZnC8YgIvyBQ2Dw38aK3bM2mnD19pNrOTJ+A4m9kcxpquHeMTyvp2LK6rR2HAQsO7aJ58zN8bOsG6j0xiQGcTiudwooV1iSrioqM7SplRPgFgbH5mbM9WJrLp49U2zkWG7I9+zRlW9rbuXzdaq7dvoEZHfuGrXp9zgLO/fo/WOkT6uqyZlupIsIvCGReQjAXg6W5fPpItZ2j2ZDPCKiktnR2WqGXLS2wcSN/F/O5vVPn8MKiq1m/4DLUzFms/VzyXD3lhFTgEoQxMJakYIkoRLWpeCGPHWeItwHIq33xx8P0evnom3/li4dfoaFtA4RCg9v6m6bw2PzFvHDelRw86VQGQuFxU4UsE6QClyDkgFy4ZcZSwDwTRnpqWbf1AEsXTmdj+/ETbMjlGMRINM9v4h7T5OVfPMq5bX/msrfbqPLFHF+3G265BVaswNnczNRdx/C0tdNTBBlKixURfqGsyLaLIlO3zGh25DPLZqLB043tx0d8aknlZpe14/zGG9DSQvNDD9F84MDQcrvdGpxdvtwarK0aOt7FlKG0WCmf+CWh7BlLkY5EZFI8Ixd2jIV9XV4qHcPzxCd7aplZX8VA0By2LN7/P6b27dsH3/ueVaVqwQL4/vchKvoXXAD332+lQX7iCSscs6r0K4zlGxF+oWyI7dkqZb06bIrVbe0Z77N5fhP3fPxMmtwuegaCNLldo/qTc2HHWBhNyOMZ7WaXUft6euCBB+Dyy+Gkk+Cuu+DNN611p5wC//7vVq3ajRvh85+HhoastL1cEVePUDbkKkwyXddCsRUOTzeUdbQxiJTbFwjA+vVWRM4TT4DfP7SusdFKmbBiBZx/flYyYI7X9NeZIMIvlA3FUoy8WOyIkslgcrKbXdL2aW312ltarNq0x48PfbCy0kqGtnw5XHWVVckqS4zH9NdjQYRfKBvylQysVOyIZawDorG96ZoKG70DQYDB9k09tJfvvr8dTr4ddu8e+qBhwBVXWD37m2+2InRyQCYzkcfzE4IIv1A25CJMMhNxyHe4ZqZ2prPv2N70QNBEA03ebi549VluePMFTt67Y/iHzj13KAPmtGlZsSMZ6brXxvsTggi/UFaM1LPNVBTHIg6Z9rAzsTXXIhbtTYdMzeGjx7h0x1+46e0XuHj369hiM2DOmmWVKFyxAs48c8zfmw7putfGW4GceET4hbJmLKKYb3HI1NZc2/nBwS4+8u5mrn/zBa5+byPVQd/gumDtBByfvtUS+0suKVgGzHTda8U2AJ9tRPiFsmYsojgWccik556prTkRMa1hyxb2/3gNTz72CA39Q2UK/TY7L5zyUf501uX0XHYlv/nC4sy/J0uk614rtgH4bCPCLxQt+RhcG4sojmXWbiY990xtzaqI7d4NDz1kReXs3MmMmFWvzDyLP5zZzJOnXUKvq4ZZEysJ94US7irfpONeK8YB+Gwiwi8UJfkaXBuLKGYqDpn23DO1dTQ7R7rBRu3c1+XldEeQr/W+wakbnoCXXx62791Ns2g7/2p+Necidlc3AlbIvU0p7DaDJrcrqW3FSiEG4POJCL9QlOTLfz6Wnl2m4pBpzz1TW5PZOdIN9mvrtuMIBrh692a+tO05Fr2zCYcZ03OfOhWWLYMVK/j6qz46+vyYpsbeM4CBQqMxlCr5HvJ4zvkjwi8UJfkaXBtrzy4Tcci05z4WWxPZOSy9QjjMR/e+ycK2P3H1zr9Q6+8f3M7rrGTzuc0s/s4/wWWXgc3K7bOqcujGMW2CiyMePyETTm6s5q5r5o9b4Sx1RPiFoiSfg2v57tmN9Skj29WtFnTt47Itz3Lplmdp7BpKpGYaNrbOP58XF13FxrMupiNs56Ull59gT+zN6NyZ9ePKJTJeEeEXipLxPLhWFP7jAwdg7VpafrKG2ft3DVu1bdpp/Okjl7Fz8XX0uOsBq7jMjAkj++vHs0skU4p91q8Iv1CUNM9vYun+bn7+8m76AybVFTY+d8mcovrxjIWCiGVvLzz2mBWR8/zzoDWzI6sOTJrGi+dfxfoFV7CjZjIamOB0UKn1uLrp5oNSmPUrwi8UJa07O1i39QCNbiezIj3+dVsPcPaMupR/PMXe68oLwSA8/bQl9o8/Dr6hyVVMmgS33cbWi6/l+70T2d89wIz6Kr4fE9Uz0hNJIY9rKZzTZIEJ0fWFtl9q7gpFyVhr2Raibm3RoDVs2gQPPgi//S0cOza0zuWCm26yMmBec03aGTALeVxbd3bw1XXb6fOHMMMam6Gocdr5wdIFRXVOL7nveeoqHaiYVNJaaw73DFDldOT12CWquSuFWISiJN2qUPEUW7GTvLBrF3z72zBvHlx0kVWp6tgxwkrx5vxF7PyP/wVHjlg3gxtvzCjtcSGP671P7aDbG0SHrXkCOgzd3iD3PrVj9A/nkUSFbQKmLpprUlw9QlEy1qiebISDloJbgY4OK699Swu8+uqwVTua5vLceUvYfMHV7K+eaPUuD/porq3N+OuyHWabzjHefcyLocAwrJ60UqDDmt3Hiit/TqLAhAq7MabOTDZJW/iVUgZQo7XuzYE9wjhhrKI51qiesd44inqAzuu1/PUtLZb/3hzqXR6Z0MSG85bwu3kf451Js1AoplW6cGdpAlw2w2yL+hiPgURRW6vb2osm/09Kwq+Uegj4H4AJbAEmKKV+qLX+fi6NE0qTbPygMw15jN5wdnV48PhC1Fc5aKhxpn3jKLq0vKZpReK0tFiROX19Q+smTIBPfYp7JpxL2+T5VLoq2Hm413KHAEc9ftwuB5UOG7s6PCxbs6lgN+RY0j3Gcxuq2dXRh9La6u1rCGuY11id9nfnmkRRW8USopxqj/8MrXWvUmo58CRwF9YNIKHwK6UeAG4AOrTWZ0WWfRv4O+BoZLOva62fzNB2oUjJlmimG/IYe8OZUuvCYfNzvD9IyAwzb3JtWiJXFGl5tYZt2yyxX7sWDh0aWudwwPXXW4O0N9wALhfP3Pc8dU7L5gqbQcjUKAMCppUT/1i/H48vRIfHl/cb8kike4zvumY+X1u3HY8vRMgMYzcM6qsc3HXN/LS/uxAUxfyNCKkKv0Mp5QA+AfxEax1USo0WDvRL4CfAr+OW/0hr/YP0zBRKiUKJZvwNp6HGRVWFPeVIoFhiXRoeX5CjHj++kEl1hZ3WnR25/bHu3TuUAfOdd4at2j77LDZfdC2nffFOLrngtIQ2N9Q42dflRUe8QLs6PITMMA01zrzfkBORrtuoeX4T31+6oCiEM13iXZ/fuemsgtqdqvCvBvYA24E2pdRJQFIfv9a6TSk1eyzGCaVJoXKZZ/OGE3VpdPb56PQEQIGhFFUVttz4obu6YN06S+zb2oat8s45hYfmfYwXzruSnikzLBdB20HuqasfZkOsGwY0CggDNgVE3CJO+/BAvkIWF7lw7kTub/0AM6xx2g3cLjsVdltS10epzBKOr0F8rD9AbaWjaMYyUgrn1Fr/WGs9XWt9nbbYC1yW4Xf+g1LqDaXUA0qp+gz3IWSZ1p0dLFuziUvue55lazbRurNj9A8lYNXiuQRNjTcQQmvrNR++zERhdJnccJrnN3HPx8+k32/Vj62wGUybUEmj25W9EDy/H37/e7jlFpgyBVauHBL9yZPhy1+GzZu5865fs/aqz9A7dWbSMMCozU1uF4d7/dgMhctuYBgKm6GwG4ojHv+wzxRqcDE6QW9itYMKm8IXMunyBlm6cHpJCHsyoi7HqEttz3EvXd4gZlgXPIwzSqqDu5OB/w+YprW+Vil1BnAh8Is0v++/gO8AOvL6n8DfJvjOlcBKgFmzZqX5NUI6jDQY+9V122msceLxh9IeBCyULzPb+X2a5zdRW+lg1sSqYZNxQmaYrR92ccl9z6c/QBoOWzntH3wQHn4YuocqV1FVBZ/8pFWm8IorwG79PPc993zKTzLRHvGi7z5LjzeIVmAzFKGwxgxrQmFdFIOLUbfchEoXDTVWDiBvIMTG9uN8Ke/WZJd4l6MZ1hhqaJAdCl/GMVVXzy+B/wd8I/L3e8DvSFP4tdZHou+VUj8D/pRk2zXAGrBm7qbzPUJ6xF+oIVPT7Q3S5wtxSlNNxoOA+e655eKGE++28viCHOj2YU83Yumddyw3zkMPWT78CGFl8Oq889h8yfUs+Pxn+Nh5cwbXRd0FRz1+Ovv8THa7qI3cAEbrqQdC4UH3FFgx74ahcESKoxTaR14Ug+c5Ir5tFTaDoBkeHGSHwpdxTFX4G7TWDyul/hVAax1SSpmjfSgepdRUrXU0NOFm4K109yFkn/gLtbPPj6HA1EOPpgUNZUyDbN9w4p8iDvdYuW4mu12jH5uDB61Zsi0t8Prrw1b1nnUOv5p9ES+edwW+iY0MBE0e3rCHe6qrTyiQMqXWyYFuHwe6BwCN3WaM2lN32BQDQQiHh0IfAaocRtoD3blgPNe0jW9bQ42TA90D2A2FLpKkd6kKf79SahKWiwal1AVAT7IPKKXWAs1Ag1JqP/A/gWal1DmR/ewBVmVmtpBN4i/UgBlGYfVUohR7byxXs2zjnyI0ML1uqOcNccfG47Hi7B98EDZssFw7UWbPtsIvV6xgVVv34DFXnBhhE/8UppTicI+Pw71+Fs4aPef9qZNr2d3Zh8cXImCGqbAZuF0O5jTUjPmYZIPxnHY7vm12m6KuykFjjZOegWBRRCOlKvz/BDwBnKyU+gvQCCxN9gGt9bIRFqc7JiDkgfgL1WYoQqam0e0c3CYXvbFsiXWuZ4DGPkVEk8fFEvD5uXH/dlj2M2tG7cDA0MqJE+HWW+GOO+DCCy2fC7Dv8eR++5FcIYYCv5ma1zN6TqdMsBelsBZTTHu2Galt37z+jKJqW0rCr7XeqpS6FDgNUMC7WutgTi0T8kb8hTp7YhXH+gPYcvhomk2xzucs28GbpD/IgoPv8bFXn+aybc9T1x/zAOx0WknQ7rjDyoBZUXHCfkZzdcTPIzjY7UOjcdmNlI5VKQhrqYRmZkKxty2p8CulPplg1alKKbTWj+XAJqEAxF+o0d54rkQjm2Kdz4HCZnsvv9r3FK6H1zK1Y//QCqWgudmKyLnlFiuNQhJGc3XEru/otUQfGJyAlcqxKnbxEQrHaD3+G5Os04AI/zgl16KRTbHO+UDh0aNW6GVLC2zaxJzYdWefbYn9smUwY0bKuxytRx67fs8xLy67QUONc3BsodjHXITiJqnwa63/e74MEfJDsaQazqZY52Sg0OuFP/7REvv16yEUGlo3fbo1SLt8uSX8GTLazTW6fqSiNOMlAkYoDCmnZVZKXQ+cCQxWXNZa35MLo4TcUExpcLMp1lnzZ5smtLZy6P6fM+GpJ6jyxfSoa2stF84dd8DixWCzJd5PlhmPETCJOiDF0jEZ76RUelEp9VOgCitNw8+xInpe1VrfmVvzLKT0YnYYaznDWLLxA831OEJKaA3bt1vhlw89ZMXeRwgZNl6d/99Yf+4Slnz1b1l8zkn5tS2GojhWWSJR+calC6ezbuuB8iyXmSMSlV5MVfjf0FqfHfNaAzymtb4qF8bGI8KfHRLVAu0ZCPLSXZenvJ9iq2eb0U1o3z5L6H/zG3j77WGrts44ndbzr2LL+UvoranL+OYojEyiDshRj59GtzNpx0SeCNIjkfCn6uqJBiZ7lVLTgOMwfIxLKH6y5VcvpiIlabmvurutDJgPPgitrcNWfdgwg+cWXUXLnIvYXz8VUEyzu3AjA6nZJtHAfn/AZFaS0oTF5KosdVIV/j8ppeqA72EVYAHL5SOUENnyFcf+cKO56v0hk/1dA7nPVR/HqDchvx+eesoapP3Tn6y/IxyvqeOVj17FU2dfztvTT6XK6eDI0T6UqSEmqVYxD6Tmqwecze9J1AGprrCuyUQdk2LqcJQ6o8Xxnw/s01p/J/J3DfAmsBP4Ue7NE7JJtgZBoz9cM6w52O2zEoAphVLkvQc2Uu+xyqaof/0V+B+/tMIwu7oG15mVlWyYfzEvLFzC22d+lP6wYs+xfmaEh+LkD/YMoDT4Q+G8pZROlVgBdjvtHO3zMyHHed6z3dNO1AH53CVzWLf1QMKOyXhN7FYI99VoPf7VwBIApdRi4F7gi8A5WJkzk6ZtEIqPZCGEqV6A0R9uR68P0KCt2q6T3S7skTzj+RL+2N7jjCN7aX7tGRa/9gxTuw4PbWQYsGQJ3HEHdx6fxr6gMdRrBByGwRGPn9rKisE4+SMeH0ormtyunP4Q0/nRxwvw+x19hMKa6go7qiJ3yfSy3dNO1gE5e0Zdwo5JPhO75fNJqhDuq9GE36a1Ph55/2lgjdb6UeBRpdS2nFkl5IRkF3M6F2D0h7uqZQsaKxNkQ42VuExrndce2BdPr2HL937DVdue47T97w1fuXChFX55221WoRPg/fuep65yuB95cq2T/d2+YUm1mtyunA9Wp/ujPyHPu7byvHf2+XM6sSsXPe1EHZBEy1t3dqLEZ8oAACAASURBVNDV72fPsX4chsHkWmdKWUozIZ9iXCj31ajCr5Sya61DwBVECqOk+FmhiBjtYk73Amye38TCWfU57YElvFH19cEf/gAtLVz07LNcFJMB8+ikKXiX3sZJ/7gSTj/9hH2O1Gu02wxObaqhrqoir+GS6R7zQuV5L3QK5dhrd0ZdJUc8fvZ3+zi1qYZvXj+/pMW4UO6r0cR7LfCiUqoTK7LnJQCl1CmMkpZZKC5Gu5gzuQBzObEo/kbV2d3Pn77//zj96GtM3vCUNbM2Sn29lQFz+XIaL77Ycu2kaXOsgERvOP/2+Fs5fcxP95jHC3Cj28n+rgHsttzmeV+1eC5fXbedA90DmGGNzVDUOO188/ozEn4mm66S2Gu3dyCITSlC6Eh9guyTTzEu1E11tJQN/6GU2gBMBZ7RQ0H/BpavXygRRruYM7kAc5kBcnVbOw4DFhzaxaWbn2Xx1ueo9wwN0lJRYWXAXL4crrvOyoiZAqPZnM/H/HSP+Ujps+urHEyqrsh5nncF1nCOtsZ0VJJts30Mo9du70CQgz0DGChsBvQHQjk5N/kU40LNyh7VXaO13jTCsvdG2lYoXka7mKMXYGefjx5vEL8Zxm4Y3LRgWtL95iSZW3s7l69bzbXbNzCjY9+wVa/PWcC5//oF+NSnoK4uo90ns7kgKZ5T/NEXKs/76rZ2aisdTJlQObgs2THJ9jGMXrudfX4MFIahCGtw2Y3BouXZOgb5HEuAwqXPFj99mTCayDTPb2Lp/m7ub/0AM6xx2W24XXbWbT3A2TPqch+lc+zYUAbMv/6Vv4tZ9eGU2aw/dwlrT76EgxOaWKjruXBzBxvbd2Y96iKvKZ4z+NEXItVyusckW8cw6i7a1eHB4wsRDIVx2C3R19pydWXz3OR7LCFKIc6pCP84IBV/aiois7H9OBOrHfQOWOX6PL4QtZXkLsJgYMCaVPXgg/DkkxAcqu3jb5zM7+cv5vlFV/Hu5Dkc6LEmXk2vdbK7s49X9xynyV3BpGpnVt0x+fa5xv7oo+fxnx/ZRtDUVNgN5jW5c94DHO36SfeYjLT9sX4//X6TS+57PqUb9fCawy4cNj+He/wEQpqqCoNGtxO3y4E3EMrauYl/UqmtrMAbCFFXVTHq8S+1VBIi/EVMKhdTumGYyS7GXR0eerxBDENZ5RfDmk5PgKDpyV6jwmF48UWrZ79uHfT2Dq2rqYGlS2HFCpzNzUzZdQxPWzuHP+zCblNMdlsho+1H+zAU9A6EaKhxUVVhp7PPx5d++zq1lY4x/fAK5XONnsdAyKTXZ6WAHgiY7DnWl9O47lSun3SPSfz2x/r9dHgCNNZUYFPw+r4u7vz1Zk5tquGua0buSceLcEONlRT4eH+QKRNcVDpsWZ9cl+mTSimmkkgc/iAUlOjF1OHxDbuYWnd2DNsu9geilPUa9XumSyAUhugsXBSGUqAiy8fKm2/CXXfBSSfB5ZfDAw9Yom+3ww03wG9/C0eO0HrXfSz7oJpLfvAiq9vaWbV4Lo1uJ6c01gzGqgfMMIZiMIzR4wvS6QngDZhJj1UqNM9v4p6Pn0mT20XPQDAv8fwwdB49vhAGCrthYBiK3oFQxuczne9Ndv2ke0zit+/3m7idNrq8QfYeH8AfDIPW7O7sT3ie9nV5qYzL2zOp2onbZc/ZuZlZX8VA0By2LJWnvWz+BvOF9PiLlFQHyDLppSR6kgjrMCFTEzRNFGAzrDQMFbZkMRxJ2L8fHnqIvl/8kpr3dgxfd8EFVkTOpz8NjY2Ddo3Uc6qJy+FSYTMImGEqbFa/5ajHDwqcNmPwhzfWmaWF8qMHzDC2SPZUFbm55TKuO9W8S+kek9jtz/vOM/T6QoQiheI1YGrQZjjh4Gwi99K8JnfOsqRm+rRXiqkkRPiLlFQvpnT9r4nEden+boKmJTYKCGsww5oJlXbmTa5N3fCeHnj0UcuV09oKWlMTWXWwYTrPnruE9ecsYdXfXnXCjz3RzU4pRdAMD/4g3S47R/sC1Fba0VrjC5kYStFQMxTSmeoPr1h8s9HzWGEzCJkapawBzAqbkdMxhnzkXQqaQ6nfVeR/0cDwROepEC63TCNsCj3BLRNE+IuUVC+mdH8gicT15y/vZmK1g2N9QUv8lSX8A8Hw6D+2QMAqT9jSAk88MSwDZm9NHRsWXMbL/+1q3jvpDFAqYW880c2uZyDId246a/AHOaehhts/OpGN7cfZ3+WlusJOVYVt0BWU6FjFU0y+2eh5dLvsHOsPEA5r0FBb7cip4OUj71KF3WAgYFqqryP/AKVUwvNUqDDHTJ72SrFCmgh/AUill5nqxZTuD2QkcQ2ZYXp9IQaCJjZlzQI1NTgMRZXTPvK+tIaNGy2x/93v4PjxoXUuF3ziE7BiBTdud+CuqRxW/CVRLy/ZzW6k6Jfo8fvUeROTZnVMdNy7vYGiSfMbex5DZi+BSFTP7Ek1ORW8fORdmtfkZs+xPrr6g/hCYRRgN8BuU6POWyjWwdFYCnWTGgsi/Hnmx8+9x/2tHxAIhVEKDnUPsPXDLr7QfDJfWnLq4HbpXEzp/EDixbV3IMiBbh+KSIdMQxiYNqFyMFnZMN591wq/bGmB3buHlhsGXHGF5be/+WarRi0w7UDqhcJTudmN1Etft/UASxdOH3wCGOlYjfS5Pce8zKgb3r5C+mYLJXS5zrsUPa/T6+2EzDBHev0Ew2FmT6zi7mtPL2qBTJVSuUlFEeHPI607O7i/9QOCZth64o26PsNh7m/94ISJUrm4mOLF9YjHB0CT20mXNwgKlLbSEkdTEnPkiBV109IC8SUwzz0XVqywMmBOO3GW70hi3jsQxGGoE2K6U7nZJXJVbWw/nnTQb6TPOWyKI71WOuYoxe6bzRW5dFfEn9dzZ9UXfY94vCPCn0dWt7VjhmMGuiI+T60hFA7nxcUQ/yPUGqbXuaitrKCywhaJ6ghT5ffz09rdnPGV78Gzz4IZE+Y2axbcfjuvXnQtPzpUYRUFefh9tN5FX8BMKuY1TjsaCIb1iH710W52mUZQjPS5yW4n+7sHSso3myuiM7d//vJu+gNWNazPXTIna9djqfWIxzsi/HlkX5cXp92gPxAjosqKoKmyG+zq8LBszaacR5hE97m6rZ0Oj58jvX6UUkxwKBZ3vs3HXnmaj739Mi5/TPbDujorP84dd8DFF9P6XmfEdeLDpmBXRx9g3USSifmyNZsImOFhPe+jntQnX2UaQZEoHfO8xhrqq5058c0WS8RQKrTu7GDd1gM0up3MitwE85auQ8g7Ivx5ZGZ9FSEzjDdgWoEN0egGwOWw4fGFTpiwlYsIk2HT4d0V1O18i0888wIf39HGpL64DJjXX2+JfVwGzFjXSfvRPmyGAg2dfQHmNtakHLnTOxC0oli0ZtbEqlHbnalLInE65hOTnLXu7OC+9Ttp7+wHYM6k9H3RxRQxlApSz7a8kJm7eWTV4rlU2G1MqLQPi2yrr3LQHzCpr3LkZfbf6rZ2ZnYf5m9eeIiWH93J47/8Mv/9tccHRb970YWwZg0cPgyPPWYN1salPY6dWRkww4MhoNHZtMkid2JnR3b2WaGfLrstpXZnOrM21c+17uzga+u2s6ujD601WmveP9rPV9dtT2smcKnN5hxppmyxT0ISMidnPX6l1APADUCH1vqsyLKJwO+A2cAe4FatdVeifYw3Yv3du44MhezNa3Lz5oGuSI/fqm+jlMJlN+jxBrJnwPHj8Mgj3H3vT1iw561hqz6cfBJPnbOEVT/9N+pmzx51V7GukwqbQSgSdx6dTZtq5I4/Et7X6E598lWm/uJUPre6rR2PL4TNiKSsAFRY0+cfufebyJ1TarM5S3ESkpA5uXT1/BL4CfDrmGV3Axu01vcqpe6O/H1XDm0oOkYSn9adHaxq2ULQNBkc+9UaX9AkFNbDps6njc8Hf/6zFZHz5z9DMMiCyKrjtRN58bwreXHRlbzZNBeHzaD1mcPs62of1ScdK+ANNRUc6Laig6bUOJMmz4of7K2qsFHttOF2pTf5Klfs6/ISCoex24YehqOT2eJFO5k7p9SEtBQnIQmZkzPh11q3KaVmxy2+CWiOvP8V0EqZCf9IrG5rZ2K1g8OR1MPRaJ+whonVjvT9rOEwvPSSJfaPPGKlUYhSU8Phy6/le43n8+ZpC3E6KxgImvQMBFEkjraJJ17A5zXVoLWmP2AOhoEmS+IVOxmrmARnZn0VnR4/WkfOA1bUlc1QJ4h2Mr94qQlpKU5CEjIn34O7k7XWhwC01oeUUgmvKqXUSiLF3WfNmpUn8wrDvi5vJK/80FT9aM6USZGIk1R47YkXOfh/f8EFm55mcs+QP9o0bOw772L+ePYVPH7SeTROnsSFcydyKGbCUzTxWTqDe8lcJ607O4ZFKF0410qxEO8SKTbBWbV4Ll9bt50ubxCtrMevsIY6p+ME0U7mzim2dqWChFyWD0Ub1aO1XgOsAVi0aJEeZfOCkcjHm04o38z6KnZ3RgYTASMi+k57Cgm6Dh6EtWvx/OKXnL9juN9++9RTaT3/KtafuZj3qB5WuGTd1gPDBjcvue/5rPmkY10gNgVb9h5nY/sxiLSts8/PV9dt5wdLF6QUu59Pmuc38f2lC4ZF9ZzSMHJUz2junGJqlyDEkm/hP6KUmhrp7U8F0k+YXkQky3S5buuBlEP5Lpw7kVf3DOW6CWsIa02tPUGtz95e+P3vLVfOhg2gNe7IqkOTpvH7M5p5/Mxm9kycgT2SUtkww8MKl8T35mOzNB71+AfTA89pqE77uERdIGZYc6jHPyw7Y1hDKBSm2wxy71M7ilIYUxXsUnPnCEKUfAv/E8BngXsjr4/n+fvTYrRee7JMl41uZ8puk43tVhnB3oHQYKijAkJhPdQrDwbh6aetPDmPP26VLYwyaRKPzbuYv15wNe/O+Qg7j3isXPp6KLwytnBJ70CQzj4/e455WbZmE6sWzx3m4jDU0Pcf7fOnPbgcdYHs7uy30gvHrQ8DdgW7jxVnhEuqlKI7RxAgt+Gca7EGchuUUvuB/4kl+A8rpe4EPgQ+lavvHyujTcBp3dnB1g+7MMNhnHbbYA3QSoeN/oDJrDRioqM+/mh5OQCtNT3eAM3d7fAP91gZMDs7hz7kcsFNN1lJ0a6+mkd+udVyOyg1YnhltHBJ70CQgz3WTcNpU0Pt+viZTKquwOMPYYY1FTaDhhpnRql5o08PATNsTeyKQxet4y59xJ0jlCK5jOpZlmDVFbn6zmySLGIDrCIV0QHYUKSIxbQ6K/qjOq5iFKRXnHpqxz4ufuVprt72HPzrgaENlYLLLrNm0n7yk4MZMCF5eKU/FB4sXBKdMKVQNNUOd/v0BUxOaawZlkI5k9S8UVtsSlmD1fFE0lTMa0zfjSQIwtgp2sHdQpMsYiN6U5jsdnGwZwAVyb9wuMdHU62Lz10yJ6X88FFWLZ7Lfz70F658s5WrXn+O+XvfGb7B2WdbYr9sGUyfPuI+koVXxhYueXXPcZw2S/SjsfPRdmUr9jxqy33rd/JeRx8Vkbzr0VuAXUFdlYO7rpmf1n4FQcgOSpfAc/eiRYv05vh0wDlm2ZoT88h7AyGa3K7Bm4JSit6BIId7fQRCVqrl+ZNruPva04EUfL9er1WxqqWF8NNPY4RCg6t8U6bh+uwdlivnIx/JS7uiPXWHTQ3esHoGgjTWOPH4QxklGouOk+zq8BAIhamwKeZNrhVfuCDkAaXUFq31ohOWi/CPTOvODr66bjt9EZ+3zVDUOO38YOmCSFZLSzxjfeYOQzG1rpKgqRPnjzFNeOEFKyLn0Uehr29oXW2tlQFzxQpYvNgqbjKKjamGjEa3fe9IL31+k4nVDiZVOwefRqL2RrezShraONYfoLbSMezJJVHbSikbpSCUA4mEX1w9SVAQyZdv1SKNer6jPeOjHh8dHj9hbW07sapi5AgerWHbNisi56GH4NChoS9xOKzMl8uXw403WoO2KZBO9sfYbadOqKSzz8/x/iBBUzOvyT1MoONTKAfDOqXopFLLRikI5YwIfwJWt7VTW+lgyoTKwWVR0Vu78gKW7u/m/tYPBkXfblN0eYNUVtiocdqtAdG9ey2hb2mBd+L89hdfbIn9rbfCpElJbRmpJ51OGt34bRvdLqqddprcrqRVq9JJNCZpfQWhdBDhT8Boorex/Tgz6is53OMjZGoMQxHWGn9HJzfueYXrtm+Au7cN3+lpp1lunOXLYc6clOxI1JP2BkJMqU2tXmymmSLTGewttWyUglDOiPAnYDTRiwpdQ42TzmO9XPb+a3z87ee59P3XcJpDg7RMnmxF46xYAQsXDmX+SpGRetKdfT66vEG6vAFcMXMIEolyptE66cxMLbVslIJQzojwJyDWj+/xhfCHrMlINy2wCorPmuCi6Y3XuGb7Bi5+/QXcA0ODtGZlJbZbbrHE/oorwJ75YY7vSXt8QTo9AXRYY7MpAmaYA10DNLhNHDbbiKKcaWqBdGamSvoCQSgdJKonCT9+7j3ub/2AUDiM02YwocrBKZ37+G7fNib/8TFch/YPbmsqg9fmLaL2c5/ljL//DNTUZMWG+PDL9qN9g7NwG2qcdPZFiqNX2PjxbeeOGtWTy9QC+fgOQRBSR6J6MiDqx58+0M3iLc9x2eZnOHn/rmHbfHDS6fz5I5fxzuLruO3G87kgy0IX35P2hUwMpWiocVJb6aC20mGldxgIJhXZfKQWkPQFglAaiPAnwuPhzOf+wNfe2MCC97Zi0+HBVfvqpmDetozZX17FyaedxpfS3HU68e7x7pbqCjtVFTZqK4ujYpUgCKWHCH8swSA8+6wVfvmHP/BvMRkwuyrdrJ//Mf7wkcvYPuMMmiZUco+uHywnliqZxLsXc8UqQRBKDxF+reG119j/f9bgfvwxJniGar+HK5y0nnYBj5x+KW1zFhKyV6A1TKtzYTNOzFqZSk9+rPHukgpYEISxUr7C/8EH7P5fa3A9vJapHfuYEVkcVortc8/h6XOXcPG/rMRw1/J8yxYr/bKhBkMntdbs6vAMlhd0O+0c7fMzodKRtCefjXh38aULgjAWykv4jx6Fhx+2XDmbNhE7herdxpN4/KzLee3Cq/FNmY43EGL7652sXXkKC2fVnxCjfqzfj8cXosPjo67SwfsdfYTCmuoKO6pCJezJS7y7IAiFZvwL/8AA/PGP8JvfwPr1EJMB8+iEBtoWXckvZ1/EriknowG7oZjL8F74hXMnnhDWebw/SH2VY1DATa0H68lGB15H6slLvLsgCIVmfAq/acKLL1o9+3XrwOMZWldbC7fcwhftZ7L3jPPQNjvtR/vQpkYZQ+UJo73w1p0drNt6gPoqx+BEruP9QRw2aKhxDu7WphS+UJiAadJ+tI9GtxOboU7oyYuPXhCEQjN+hF9reOMNS+zXroUDMZWr7Ha49lqrmMkNN0BlJZ1rNtHv8VEVEfCDPQMQtlIrewOhwV746rZ2AiETjy9EwAzjtBu4XXa8AXOwypbHF7RKHWIlbAuaYfZ3DVBf5eCb159xgqnioxcEoZCUvvDv2zeUAfOtt4avu+ACS+xvvRUaGoatirpvzLDGaTci5RLDVEWyVkZ74f/8yDZ6fSG0BjOsCZom3oCJy2EQNDXeQIiOXh9KWWUX7SpSTNymmFRdIQIvCELRUZrC391tFTFpabFcOrFpJ+bNG8qAefLJI3486r6ZWO2gxxvEFzIJhQ2+0HwyX1py6rBtg6YmrDXhyPwtpayv84XCLF04nY3tx9lzzIvLbgzOpgUGZ9MKgiAUG6Uj/IEAPPWUJfZ//CP4/UPrGhutDJjLl8P554+aATMaSz+h0kVDjZXa2BsIsbH9+AmzcCvsBr0+BouwRAvHKqyUDmtXXjBiOUOJ1BEEoVgpDeH/8EOYOhWOHx9aVlkJN99sif2VV1qVrFIknVj6eU1ujvcfA6yevlLWQK7TbgxuL5E6giCUEqUh/EePWq+GAUuWWGJ/883gdme0u3Ri6VctnsvWD7sIa41NKbSGMBq3yz64vUTqCIJQSpSG8FdVwXe/C7fdZvX8x0iqPfRoCgan3cDjD2FqTWWFDbfLQYV9eO57idQRBKFUKNt8/KPljo9NplbpsHGs3ypQ7nbamDe5Vnr0giAUPZKPPwGJbnvxydQaalxUVYxeoFwQBKHYKUvhj6+sZYbDfOuJt1m6v5uN7cfZ1+XlqMfPlFrnsM9J8XBBEMYDRqENyDetOzu4v/UDwlrjsBmYGo71BenzBbm/9YPBpGtKwYFuHx7fUCy+hGgKgjAeKKsef+vODr7029fxh8IoQKGwGYowmi5vEKXgcI+PgBnGwJqEdbjHR43TLiGagiCMG8pG+KODtf2B0ODs26AZBgwMI+Lr1xAyh8I2NRozbM3AlRBNQRDGCwURfqXUHsADmEBopFHnbBMdrHXZbfhDJiFtiX3QDGOPzMtVCgxj6L0NK6Lnpbsuz7V5giAIeaOQPf7LtNad+fqy6GzdRreTg90+7EYYU1s9f0MpKh2KUCQvT/SJAG2lbBAEQRhPlI2qzayvYiBo4nY5mFbnwm4YgykY5kyqYvakahrcFdgNhRnW2A1Fg7uCeU2ZzQ4WBEEoVgol/Bp4Rim1RSm1cqQNlFIrlVKblVKbj0ZTNoyBVYvnDqZR1loTioj7rPpKgmHN0T4/IVMzZYKL0ya7mTLBhcNmk8FcQRDGHQWZuauUmqa1PqiUagKeBb6otW5LtH22Zu5GZ+tu/bALpWCy2zWYRtkbCOEwFPXVTsm3IwjCuKCoZu5qrQ9GXjuUUr8HPgokFP5sEc2nc8l9z0di9YfSN1c6bPQMBFn/FZmVKwjC+Cbvrh6lVLVSyh19D1wFvJX8U9kl6u+PRSZnCYJQLhTCxz8ZeFkptR14Ffiz1np9Pg2I9/fH1tgVBEEY7+Td1aO1bgcW5Pt7Y5H8+YIglDNlM3M3HsmfLwhCuVI2cfyCIAiChQi/IAhCmSHCLwiCUGaI8AuCIJQZZTO4G521u6/Ly0yJ4hEEoYwpix5/NBd/tLpWh8fHt554m9adHYU2TRAEIe+UhfDHFk5Xynp12BSr29oLbZogCELeKQvh39flpdJhG7ZMCqcLglCulIXwS24eQRCEIcpC+CU3jyAIwhBlIfzN85u45+Nn0uR20TMQpMnt4p6PnylRPYIglCVlE84puXkEQRAsyqLHLwiCIAwhwi8IglBmiPALgiCUGSL8giAIZYYIvyAIQpkxbqN6JCmbIAjCyIzLHr8kZRMEQUjMuBR+ScomCIKQmHEp/JKUTRAEITHjUvglKZsgCEJixqXwS1I2QRCExIxL4ZekbIIgCIkZt+GckpRNEARhZMZlj18QBEFIjAi/IAhCmSHCLwiCUGaI8AuCIJQZIvyCIAhlhtJaF9qGUVFKHQX2ZvjxBqAzi+YUEmlL8TKe2jOe2gLjqz3ptuUkrXVj/MKSEP6xoJTarLVeVGg7soG0pXgZT+0ZT22B8dWebLVFXD2CIAhlhgi/IAhCmVEOwr+m0AZkEWlL8TKe2jOe2gLjqz1Zacu49/ELgiAIwymHHr8gCIIQgwi/IAhCmTFuhV8pdY1S6l2l1PtKqbsLbU8mKKX2KKXeVEptU0ptjiybqJR6Vim1K/JaX2g7R0Ip9YBSqkMp9VbMshFtVxY/jpyrN5RSCwtn+YkkaMu3lVIHIudmm1Lquph1/xppy7tKqasLY3VilFIzlVIvKKV2KKXeVkr9Y2R5yZ2fJG0pufOjlHIppV5VSm2PtOXfI8vnKKVeiZyX3ymlKiLLnZG/34+sn53yl2mtx90/wAZ8AMwFKoDtwBmFtiuDduwBGuKWfQ+4O/L+buC+QtuZwPbFwELgrdFsB64DngIUcAHwSqHtT6Et3wa+OsK2Z0SuNycwJ3Id2grdhjgbpwILI+/dwHsRu0vu/CRpS8mdn8jxrYm8dwCvRI73w8BtkeU/Bf4+8v7zwE8j728Dfpfqd43XHv9Hgfe11u1a6wDwW+CmAtuULW4CfhV5/yvgEwW0JSFa6zbgeNziRLbfBPxaW2wC6pRSU/Nj6egkaEsibgJ+q7X2a613A+9jXY9Fg9b6kNZ6a+S9B9gBTKcEz0+StiSiaM9P5Pj2Rf50RP5p4HJgXWR5/HmJnq91wBVKKZXKd41X4Z8O7Iv5ez/JL4ZiRQPPKKW2KKVWRpZN1lofAuuiB0qp2kwi20v1fP1DxPXxQIzLraTaEnEPnIvVuyzp8xPXFijB86OUsimltgEdwLNYTyTdWutQZJNYewfbElnfA0xK5XvGq/CPdNcrxbjVi7XWC4FrgS8opRYX2qAcUYrn67+Ak4FzgEPAf0aWl0xblFI1wKPAl7XWvck2HWFZUbVphLaU5PnRWpta63OAGVhPIqePtFnkNeO2jFfh3w/MjPl7BnCwQLZkjNb6YOS1A/g91oVwJPqYHXntKJyFaZPI9pI7X1rrI5EfaRj4GUPugpJoi1LKgSWUD2qtH4ssLsnzM1JbSv38aK27gVYsH3+dUipaJjfW3sG2RNZPIEWX5HgV/teAeZHR8AqsgY8nCmxTWiilqpVS7uh74CrgLax2fDay2WeBxwtjYUYksv0J4DOR6JELgJ6oy6FYifNx34x1bsBqy22RiIs5wDzg1Xzbl4yIH/gXwA6t9Q9jVpXc+UnUllI8P0qpRqVUXeR9JbAEa8ziBWBpZLP48xI9X0uB53VkpHdUCj2SncMR8uuwRvg/AL5RaHsysH8uVvTBduDtaBuwfHgbgF2R14mFtjWB/WuxHrGDWD2TOxPZjvXIen/kXL0JLCq0/Sm05TcRW9+I/ACnxmz/jUhb3gWuLbT9I7TnEiyXwBvAtsi/60rx/CRpS8mdH+Bs4PWIzW8B34osn4t1c3ofeARwRpa7In+/H1k/N9XvkpQNgiAIZcZ4dfUIgiAICRDh2b9RKwAAAQRJREFUFwRBKDNE+AVBEMoMEX5BEIQyQ4RfEAShzBDhF4QUUEpNisn0eDgu8+PVcdt+WSn1fwtlqyCMhgi/IKSA1vqY1vocbU2n/ynwo8j7/8KaIBjLbVix/4JQlIjwC8LYWAfcoJRywmCisGnAywW0SRCSIsIvCGNAa30Ma9bkNZFF0bzoMjNSKFpE+AVh7KxlyN0jbh6h6BHhF4Sx8wesIhgLgUodKQwiCMWKCL8gjBFtVU1qBR5AevtCCSDCLwjZYS2wAKvMpyAUNZKdUxAEocyQHr8gCEKZIcIvCIJQZojwC4IglBki/IIgCGWGCL8gCEKZIcIvCIJQZojwC4IglBn/P2k7fxoXRjhZAAAAAElFTkSuQmCC\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.regplot('TV', 'Sales', df, line_kws = {\"color\":\"r\"}, ci=None)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# b) Run OLS (Ordinary Least Square) between TV, Radio, and Newspaper as predictors, and \n", "# Sales as the response. Print the summary of the linear regression result." ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.897\n", "Model: OLS Adj. R-squared: 0.896\n", "Method: Least Squares F-statistic: 570.3\n", "Date: Thu, 11 Mar 2021 Prob (F-statistic): 1.58e-96\n", "Time: 15:26:15 Log-Likelihood: -386.18\n", "No. Observations: 200 AIC: 780.4\n", "Df Residuals: 196 BIC: 793.6\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 2.9389 0.312 9.422 0.000 2.324 3.554\n", "TV 0.0458 0.001 32.809 0.000 0.043 0.049\n", "Radio 0.1885 0.009 21.893 0.000 0.172 0.206\n", "Newspaper -0.0010 0.006 -0.177 0.860 -0.013 0.011\n", "==============================================================================\n", "Omnibus: 60.414 Durbin-Watson: 2.084\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 151.241\n", "Skew: -1.327 Prob(JB): 1.44e-33\n", "Kurtosis: 6.332 Cond. No. 454.\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "model = sm.OLS.from_formula('Sales ~ TV + Radio + Newspaper ', df)\n", "result = model.fit()\n", "print(result.summary())" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# c) From the result of the Ordinary Least Square (OLS) method, which advertising method is not \n", "# contributing to Sales? By reviewing the OLS result, Why do you think that method is ineffective \n", "# in terms of its coefficient, t value, and P>|t|?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Answer: \n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# d) By removing the worst advertising method, run the OLS with the remaining two other advertising \n", "# methods and print summary of the linear regression result." ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.897\n", "Model: OLS Adj. R-squared: 0.896\n", "Method: Least Squares F-statistic: 859.6\n", "Date: Thu, 11 Mar 2021 Prob (F-statistic): 4.83e-98\n", "Time: 15:57:57 Log-Likelihood: -386.20\n", "No. Observations: 200 AIC: 778.4\n", "Df Residuals: 197 BIC: 788.3\n", "Df Model: 2 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 2.9211 0.294 9.919 0.000 2.340 3.502\n", "Radio 0.1880 0.008 23.382 0.000 0.172 0.204\n", "TV 0.0458 0.001 32.909 0.000 0.043 0.048\n", "==============================================================================\n", "Omnibus: 60.022 Durbin-Watson: 2.081\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 148.679\n", "Skew: -1.323 Prob(JB): 5.19e-33\n", "Kurtosis: 6.292 Cond. No. 425.\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "model = sm.OLS.from_formula('Sales ~ Radio + TV ', df)\n", "result = model.fit()\n", "print(result.summary())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# e) From previous OLS result, we obtained a very good R-squared score. To get a better R-squared \n", "# score, run the OLS model again by adding variations of predictors such as interaction terms or \n", "# nonlinear terms. Print summary of the linear regression to demonstrate a better result. " ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.968\n", "Model: OLS Adj. R-squared: 0.967\n", "Method: Least Squares F-statistic: 1963.\n", "Date: Thu, 11 Mar 2021 Prob (F-statistic): 6.68e-146\n", "Time: 16:11:57 Log-Likelihood: -270.14\n", "No. Observations: 200 AIC: 548.3\n", "Df Residuals: 196 BIC: 561.5\n", "Df Model: 3 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "Intercept 6.7502 0.248 27.233 0.000 6.261 7.239\n", "Radio 0.0289 0.009 3.241 0.001 0.011 0.046\n", "TV 0.0191 0.002 12.699 0.000 0.016 0.022\n", "Radio:TV 0.0011 5.24e-05 20.727 0.000 0.001 0.001\n", "==============================================================================\n", "Omnibus: 128.132 Durbin-Watson: 2.224\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 1183.719\n", "Skew: -2.323 Prob(JB): 9.09e-258\n", "Kurtosis: 13.975 Cond. No. 1.80e+04\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 1.8e+04. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n" ] } ], "source": [ "#Interaction terms\n", "model = sm.OLS.from_formula('Sales ~ Radio*TV ', df)\n", "result = model.fit()\n", "print(result.summary())\n" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Sales R-squared: 0.619\n", "Model: OLS Adj. R-squared: 0.615\n", "Method: Least Squares F-statistic: 160.1\n", "Date: Thu, 11 Mar 2021 Prob (F-statistic): 5.21e-42\n", "Time: 16:12:04 Log-Likelihood: -517.18\n", "No. Observations: 200 AIC: 1040.\n", "Df Residuals: 197 BIC: 1050.\n", "Df Model: 2 \n", "Covariance Type: nonrobust \n", "=================================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "---------------------------------------------------------------------------------\n", "Intercept 6.1141 0.659 9.275 0.000 4.814 7.414\n", "TV 0.0673 0.011 6.349 0.000 0.046 0.088\n", "np.square(TV) -6.847e-05 3.56e-05 -1.924 0.056 -0.000 1.69e-06\n", "==============================================================================\n", "Omnibus: 0.681 Durbin-Watson: 1.967\n", "Prob(Omnibus): 0.712 Jarque-Bera (JB): 0.770\n", "Skew: -0.012 Prob(JB): 0.681\n", "Kurtosis: 2.697 Cond. No. 1.11e+05\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "[2] The condition number is large, 1.11e+05. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n" ] } ], "source": [ "#Non linear terms\n", "model = sm.OLS.from_formula('Sales ~ TV', df)\n", "\n", "model.fit2 = sm.OLS.from_formula('Sales ~ TV + np.square(TV)', df).fit()\n", "print (lm.fit2.summary())" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# f) From the previous analysis result in e), What percentages of advertising investments would \n", "# result the best Sales performance between the two advertising methods?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Answer: " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# 2. Conduct KNN method to classify on the 'AHD' from the Heart.csv data. " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Run the following commands to import pandas and numpy modules." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# Run the following commands to read Heart.csv data. " ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Age</th>\n", " <th>Sex</th>\n", " <th>ChestPain</th>\n", " <th>RestBP</th>\n", " <th>Chol</th>\n", " <th>Fbs</th>\n", " <th>RestECG</th>\n", " <th>MaxHR</th>\n", " <th>ExAng</th>\n", " <th>Oldpeak</th>\n", " <th>Slope</th>\n", " <th>Ca</th>\n", " <th>Thal</th>\n", " <th>AHD</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>63</td>\n", " <td>1</td>\n", " <td>typical</td>\n", " <td>145</td>\n", " <td>233</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>150</td>\n", " <td>0</td>\n", " <td>2.3</td>\n", " <td>3</td>\n", " <td>0.0</td>\n", " <td>fixed</td>\n", " <td>No</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>67</td>\n", " <td>1</td>\n", " <td>asymptomatic</td>\n", " <td>160</td>\n", " <td>286</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>108</td>\n", " <td>1</td>\n", " <td>1.5</td>\n", " <td>2</td>\n", " <td>3.0</td>\n", " <td>normal</td>\n", " <td>Yes</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>67</td>\n", " <td>1</td>\n", " <td>asymptomatic</td>\n", " <td>120</td>\n", " <td>229</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>129</td>\n", " <td>1</td>\n", " <td>2.6</td>\n", " <td>2</td>\n", " <td>2.0</td>\n", " <td>reversable</td>\n", " <td>Yes</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>37</td>\n", " <td>1</td>\n", " <td>nonanginal</td>\n", " <td>130</td>\n", " <td>250</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>187</td>\n", " <td>0</td>\n", " <td>3.5</td>\n", " <td>3</td>\n", " <td>0.0</td>\n", " <td>normal</td>\n", " <td>No</td>\n", " </tr>\n", " <tr>\n", " <td>5</td>\n", " <td>41</td>\n", " <td>0</td>\n", " <td>nontypical</td>\n", " <td>130</td>\n", " <td>204</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>172</td>\n", " <td>0</td>\n", " <td>1.4</td>\n", " <td>1</td>\n", " <td>0.0</td>\n", " <td>normal</td>\n", " <td>No</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Age Sex ChestPain RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak \\\n", "1 63 1 typical 145 233 1 2 150 0 2.3 \n", "2 67 1 asymptomatic 160 286 0 2 108 1 1.5 \n", "3 67 1 asymptomatic 120 229 0 2 129 1 2.6 \n", "4 37 1 nonanginal 130 250 0 0 187 0 3.5 \n", "5 41 0 nontypical 130 204 0 2 172 0 1.4 \n", "\n", " Slope Ca Thal AHD \n", "1 3 0.0 fixed No \n", "2 2 3.0 normal Yes \n", "3 2 2.0 reversable Yes \n", "4 3 0.0 normal No \n", "5 1 0.0 normal No " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pd.read_csv('Heart.csv', index_col=0).dropna() \n", "# dropna() removes the data rows that contain NaN values. \n", "df2.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# a) Find the data shape of the df2 data frame." ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(297, 14)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.shape" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# Import necessary packages to run KNN method." ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "from sklearn import neighbors\n", "from sklearn.metrics import confusion_matrix, classification_report\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis\n", "from sklearn.metrics import confusion_matrix, classification_report, precision_score" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# b) Set X data frame by droping the columns AHD, ChestPain, and Thal from df2.\n", "# Set y responses with Acute Heart Disease (AHD) and convert y into arrays. \n", "# Display top rows of X data frame." ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Age</th>\n", " <th>Sex</th>\n", " <th>RestBP</th>\n", " <th>Chol</th>\n", " <th>Fbs</th>\n", " <th>RestECG</th>\n", " <th>MaxHR</th>\n", " <th>ExAng</th>\n", " <th>Oldpeak</th>\n", " <th>Slope</th>\n", " <th>Ca</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>1</td>\n", " <td>63</td>\n", " <td>1</td>\n", " <td>145</td>\n", " <td>233</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>150</td>\n", " <td>0</td>\n", " <td>2.3</td>\n", " <td>3</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <td>2</td>\n", " <td>67</td>\n", " <td>1</td>\n", " <td>160</td>\n", " <td>286</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>108</td>\n", " <td>1</td>\n", " <td>1.5</td>\n", " <td>2</td>\n", " <td>3.0</td>\n", " </tr>\n", " <tr>\n", " <td>3</td>\n", " <td>67</td>\n", " <td>1</td>\n", " <td>120</td>\n", " <td>229</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>129</td>\n", " <td>1</td>\n", " <td>2.6</td>\n", " <td>2</td>\n", " <td>2.0</td>\n", " </tr>\n", " <tr>\n", " <td>4</td>\n", " <td>37</td>\n", " <td>1</td>\n", " <td>130</td>\n", " <td>250</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>187</td>\n", " <td>0</td>\n", " <td>3.5</td>\n", " <td>3</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <td>5</td>\n", " <td>41</td>\n", " <td>0</td>\n", " <td>130</td>\n", " <td>204</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>172</td>\n", " <td>0</td>\n", " <td>1.4</td>\n", " <td>1</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Age Sex RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak Slope Ca\n", "1 63 1 145 233 1 2 150 0 2.3 3 0.0\n", "2 67 1 160 286 0 2 108 1 1.5 2 3.0\n", "3 67 1 120 229 0 2 129 1 2.6 2 2.0\n", "4 37 1 130 250 0 0 187 0 3.5 3 0.0\n", "5 41 0 130 204 0 2 172 0 1.4 1 0.0" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = df2.drop(['AHD','ChestPain','Thal'], axis=1)\n", "y= df2.AHD\n", "X.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# c) Set X_train and y_train from the 100th row to the last row of X and y data. \n", "# Display top rows of X_train." ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Age</th>\n", " <th>Sex</th>\n", " <th>RestBP</th>\n", " <th>Chol</th>\n", " <th>Fbs</th>\n", " <th>RestECG</th>\n", " <th>MaxHR</th>\n", " <th>ExAng</th>\n", " <th>Oldpeak</th>\n", " <th>Slope</th>\n", " <th>Ca</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <td>102</td>\n", " <td>34</td>\n", " <td>1</td>\n", " <td>118</td>\n", " <td>182</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>174</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>1</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <td>103</td>\n", " <td>57</td>\n", " <td>0</td>\n", " <td>128</td>\n", " <td>303</td>\n", " <td>0</td>\n", " <td>2</td>\n", " <td>159</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>1</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <td>104</td>\n", " <td>71</td>\n", " <td>0</td>\n", " <td>110</td>\n", " <td>265</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>130</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>1</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <td>105</td>\n", " <td>49</td>\n", " <td>1</td>\n", " <td>120</td>\n", " <td>188</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>139</td>\n", " <td>0</td>\n", " <td>2.0</td>\n", " <td>2</td>\n", " <td>3.0</td>\n", " </tr>\n", " <tr>\n", " <td>106</td>\n", " <td>54</td>\n", " <td>1</td>\n", " <td>108</td>\n", " <td>309</td>\n", " <td>0</td>\n", " <td>0</td>\n", " <td>156</td>\n", " <td>0</td>\n", " <td>0.0</td>\n", " <td>1</td>\n", " <td>0.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Age Sex RestBP Chol Fbs RestECG MaxHR ExAng Oldpeak Slope Ca\n", "102 34 1 118 182 0 2 174 0 0.0 1 0.0\n", "103 57 0 128 303 0 2 159 0 0.0 1 1.0\n", "104 71 0 110 265 1 2 130 0 0.0 1 1.0\n", "105 49 1 120 188 0 0 139 0 2.0 2 3.0\n", "106 54 1 108 309 0 0 156 0 0.0 1 0.0" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train = X[100:]\n", "y_train = y[100:]\n", "X_train.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# Set X_test and y_test to the first 100 rows of X and y data. \n", "# Display top rows of y_test." ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1 No\n", "2 Yes\n", "3 Yes\n", "4 No\n", "5 No\n", "Name: AHD, dtype: object" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test = X[:100]\n", "y_test = y[:100]\n", "y.head()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "# d) Run the KNN method with k = 1 to 5 to fit the KNN model with X_train and y_train, and predict \n", "# X_test. Print confusion_matrix and classification_report for each k iteration." ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " precision recall f1-score support\n", "\n", " No 0.660 0.579 0.617 57\n", " Yes 0.520 0.605 0.559 43\n", "\n", " accuracy 0.590 100\n", " macro avg 0.590 0.592 0.588 100\n", "weighted avg 0.600 0.590 0.592 100\n", "\n", "[[33 24]\n", " [17 26]]\n", " precision recall f1-score support\n", "\n", " No 0.629 0.772 0.693 57\n", " Yes 0.567 0.395 0.466 43\n", "\n", " accuracy 0.610 100\n", " macro avg 0.598 0.584 0.579 100\n", "weighted avg 0.602 0.610 0.595 100\n", "\n", "[[44 13]\n", " [26 17]]\n", " precision recall f1-score support\n", "\n", " No 0.673 0.649 0.661 57\n", " Yes 0.556 0.581 0.568 43\n", "\n", " accuracy 0.620 100\n", " macro avg 0.614 0.615 0.614 100\n", "weighted avg 0.622 0.620 0.621 100\n", "\n", "[[37 20]\n", " [18 25]]\n", " precision recall f1-score support\n", "\n", " No 0.667 0.842 0.744 57\n", " Yes 0.679 0.442 0.535 43\n", "\n", " accuracy 0.670 100\n", " macro avg 0.673 0.642 0.640 100\n", "weighted avg 0.672 0.670 0.654 100\n", "\n", "[[48 9]\n", " [24 19]]\n", " precision recall f1-score support\n", "\n", " No 0.684 0.684 0.684 57\n", " Yes 0.581 0.581 0.581 43\n", "\n", " accuracy 0.640 100\n", " macro avg 0.633 0.633 0.633 100\n", "weighted avg 0.640 0.640 0.640 100\n", "\n", "[[39 18]\n", " [18 25]]\n" ] } ], "source": [ "for i in range (1,6):\n", " knn = neighbors.KNeighborsClassifier(n_neighbors=i)\n", " pred = knn.fit(X_train, y_train).predict(X_test)\n", "\n", " print(classification_report(y_test, pred, digits=3))\n", " print(confusion_matrix(y_test, pred))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# e) Identifying AHD is the most important to save lives. Which measure between Precision, Recall,\n", "# and overall Accuracy do you use for the best AHD prediction? Explain your reasoning. \n", "# Based upon your reasoning, which KNN k value produces the best accuracy in identifying the\n", "# true AHD occurrences? " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Answer: " ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# 3. Conduct Logistic Regression to classify on the 'AHD' in the Heart.csv data. " ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Import statsmodels.api and statsmodels.formula.api." ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# From #2 data frame df2, set the first 200 'Age', 'RestBP', 'Chol', 'MaxHR', 'Oldpeak', 'Slope', \n", "# and 'AHD' data as training data, X_train, and the rest as test data, X_test. (Note that 'AHD'\n", "# is added this time.)\n", "# Set the first 200 ‘AHD’ data as training responses, y_train, and the rest as test responses, \n", "# y_test. (y_train and y_test need to convert into arrays with values.ravel().)" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "X = df2.drop(['Sex','ChestPain','Fbs','RestECG','ExAng','Ca','Thal'], axis=1)\n", "y= df2.AHD" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [], "source": [ "X_train = X[:200]\n", "X_test = X[200:]" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "y_train = y[:200].ravel()\n", "y_test = y[200:].ravel()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "# Using the Logistic Regression method, fit the GLM model with the X_train data and \n", "# store the outcome to the \"result\" variable. " ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "ename": "PatsyError", "evalue": "Error evaluating factor: NameError: name 'Lag1' is not defined\n Direction ~ Lag1+Lag2\n ^^^^", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\compat.py\u001b[0m in \u001b[0;36mcall_and_wrap_exc\u001b[1;34m(msg, origin, f, *args, **kwargs)\u001b[0m\n\u001b[0;32m 35\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 36\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 37\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mException\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\eval.py\u001b[0m in \u001b[0;36meval\u001b[1;34m(self, expr, source_name, inner_namespace)\u001b[0m\n\u001b[0;32m 165\u001b[0m return eval(code, {}, VarLookupDict([inner_namespace]\n\u001b[1;32m--> 166\u001b[1;33m + self._namespaces))\n\u001b[0m\u001b[0;32m 167\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m<string>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'Lag1' is not defined", "\nThe above exception was the direct cause of the following exception:\n", "\u001b[1;31mPatsyError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m<ipython-input-81-578b6b9e7f5c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mformula\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m'Direction ~ Lag1+Lag2'\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msmf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mglm\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mformula\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mformula\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfamily\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfamilies\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBinomial\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\statsmodels\\base\\model.py\u001b[0m in \u001b[0;36mfrom_formula\u001b[1;34m(cls, formula, data, subset, drop_cols, *args, **kwargs)\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 158\u001b[0m tmp = handle_formula_data(data, None, formula, depth=eval_env,\n\u001b[1;32m--> 159\u001b[1;33m missing=missing)\n\u001b[0m\u001b[0;32m 160\u001b[0m \u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mendog\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexog\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmissing_idx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdesign_info\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtmp\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 161\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\statsmodels\\formula\\formulatools.py\u001b[0m in \u001b[0;36mhandle_formula_data\u001b[1;34m(Y, X, formula, depth, missing)\u001b[0m\n\u001b[0;32m 63\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdata_util\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_is_using_pandas\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mY\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 64\u001b[0m result = dmatrices(formula, Y, depth, return_type='dataframe',\n\u001b[1;32m---> 65\u001b[1;33m NA_action=na_action)\n\u001b[0m\u001b[0;32m 66\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 67\u001b[0m result = dmatrices(formula, Y, depth, return_type='dataframe',\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\highlevel.py\u001b[0m in \u001b[0;36mdmatrices\u001b[1;34m(formula_like, data, eval_env, NA_action, return_type)\u001b[0m\n\u001b[0;32m 308\u001b[0m \u001b[0meval_env\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mEvalEnvironment\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcapture\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0meval_env\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreference\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 309\u001b[0m (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,\n\u001b[1;32m--> 310\u001b[1;33m NA_action, return_type)\n\u001b[0m\u001b[0;32m 311\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlhs\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 312\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mPatsyError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"model is missing required outcome variables\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\highlevel.py\u001b[0m in \u001b[0;36m_do_highlevel_design\u001b[1;34m(formula_like, data, eval_env, NA_action, return_type)\u001b[0m\n\u001b[0;32m 163\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0miter\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 164\u001b[0m design_infos = _try_incr_builders(formula_like, data_iter_maker, eval_env,\n\u001b[1;32m--> 165\u001b[1;33m NA_action)\n\u001b[0m\u001b[0;32m 166\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdesign_infos\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 167\u001b[0m return build_design_matrices(design_infos, data,\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\highlevel.py\u001b[0m in \u001b[0;36m_try_incr_builders\u001b[1;34m(formula_like, data_iter_maker, eval_env, NA_action)\u001b[0m\n\u001b[0;32m 68\u001b[0m \u001b[0mdata_iter_maker\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[0meval_env\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 70\u001b[1;33m NA_action)\n\u001b[0m\u001b[0;32m 71\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\build.py\u001b[0m in \u001b[0;36mdesign_matrix_builders\u001b[1;34m(termlists, data_iter_maker, eval_env, NA_action)\u001b[0m\n\u001b[0;32m 694\u001b[0m \u001b[0mfactor_states\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 695\u001b[0m \u001b[0mdata_iter_maker\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 696\u001b[1;33m NA_action)\n\u001b[0m\u001b[0;32m 697\u001b[0m \u001b[1;31m# Now we need the factor infos, which encapsulate the knowledge of\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 698\u001b[0m \u001b[1;31m# how to turn any given factor into a chunk of data:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\build.py\u001b[0m in \u001b[0;36m_examine_factor_types\u001b[1;34m(factors, factor_states, data_iter_maker, NA_action)\u001b[0m\n\u001b[0;32m 441\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mdata\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mdata_iter_maker\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 442\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mfactor\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mexamine_needed\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 443\u001b[1;33m \u001b[0mvalue\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfactor\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0meval\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfactor_states\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mfactor\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 444\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfactor\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcat_sniffers\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mguess_categorical\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 445\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mfactor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcat_sniffers\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\eval.py\u001b[0m in \u001b[0;36meval\u001b[1;34m(self, memorize_state, data)\u001b[0m\n\u001b[0;32m 564\u001b[0m return self._eval(memorize_state[\"eval_code\"],\n\u001b[0;32m 565\u001b[0m \u001b[0mmemorize_state\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 566\u001b[1;33m data)\n\u001b[0m\u001b[0;32m 567\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 568\u001b[0m \u001b[0m__getstate__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mno_pickling\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\eval.py\u001b[0m in \u001b[0;36m_eval\u001b[1;34m(self, code, memorize_state, data)\u001b[0m\n\u001b[0;32m 549\u001b[0m \u001b[0mmemorize_state\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"eval_env\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0meval\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 550\u001b[0m \u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 551\u001b[1;33m inner_namespace=inner_namespace)\n\u001b[0m\u001b[0;32m 552\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 553\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmemorize_chunk\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstate\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwhich_pass\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\compat.py\u001b[0m in \u001b[0;36mcall_and_wrap_exc\u001b[1;34m(msg, origin, f, *args, **kwargs)\u001b[0m\n\u001b[0;32m 41\u001b[0m origin)\n\u001b[0;32m 42\u001b[0m \u001b[1;31m# Use 'exec' to hide this syntax from the Python 2 parser:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 43\u001b[1;33m \u001b[0mexec\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"raise new_exc from e\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 44\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 45\u001b[0m \u001b[1;31m# In python 2, we just let the original exception escape -- better\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\patsy\\compat.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n", "\u001b[1;31mPatsyError\u001b[0m: Error evaluating factor: NameError: name 'Lag1' is not defined\n Direction ~ Lag1+Lag2\n ^^^^" ] } ], "source": [ "formula = 'Direction ~ '\n", "model = smf.glm(formula = formula, data = X_train, family = sm.families.Binomial())\n", "result = model.fit()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "# Predict X_test data and store the result to \"predictions\" variable.\n", "# Generate predictions_nominal to classiy \"Yes\" if prediction of X_test < 0.5 and “No” otherwise.\n", "# Print confusion_matrix between y_test and predictions_nominal. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "# Print classification_report between the y_test and Predictions_nominal. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "# If we compare KNN and Logistic Regression for the same Heart.csv data, which method is better?\n", "# Why do you think it is better?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Answer: " ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# 4. Conduct Lab04B LDA-QDA methods to classify AHD status using the Heart.csv data. " ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# From df2, set the first 200 'Age', 'RestBP', 'Chol', 'MaxHR', 'Oldpeak', and 'Slope' data as \n", "# training data, X_train, and the rest as test data, X_test. Set the first 200 ‘AHD’ data as \n", "# training responses, y_train, and the rest # as test responses, y_test. \n", "# (y_train and y_test need to convert into arrays with values.ravel().)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "# Import LinearDiscriminantAnalysis and QuadraticDiscriminantAnalysis from \n", "# sklearn.discriminant_analysis, and confusion_matrix, classification_report, and precision_score \n", "# from sklearn.metrics." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "# Fit the LDA model with X_train and y_train data and predict X_test from the model." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "# Print confusion_matrix and classification_report for the y_test and predictions of X_test." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# Fit the QDA model with X_train and y_train data and predict X_test from the model." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# Print confusion_matrix and classification_report for the y_test and predictions of X_test." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# For the same Heart data set with the same predictor variables, which analysis tool is working best\n", "# among KNN, Logistic Regression, LDA, and QDA? What might be the reason for the best performance\n", "# by this method?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Answer: \n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "# For the same data set with the same predictor variables, which analysis tool is working worst\n", "# among KNN, Logistic Regression, LDA, and QDA? What might be the reason for the worst performance\n", "# by this method?" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "Answer: \n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }