python modeling

nieyanan
Part1.ipynb

{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<style>.container { width:100% !important; }</style>" ], "text/plain": [ "<IPython.core.display.HTML object>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.core.display import display, HTML\n", "display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import datetime\n", "np.set_printoptions(suppress=True, linewidth=200, edgeitems=100)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "wish_df=pd.read_csv(\"summer product sales.csv\")\n", "#wish_df.head()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "#fill nulls with 0\n", "\n", "wish_df['rating_five_count']=wish_df['rating_five_count'].fillna(0)\n", "wish_df['rating_four_count']=wish_df['rating_four_count'].fillna(0)\n", "wish_df['rating_three_count']=wish_df['rating_three_count'].fillna(0)\n", "wish_df['rating_two_count']=wish_df['rating_two_count'].fillna(0)\n", "wish_df['rating_one_count']=wish_df['rating_one_count'].fillna(0)\n", "#wish_df.isnull().sum()\n" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "0\n" ] } ], "source": [ "#remove duplicates\n", "wish_df.drop_duplicates(keep='last', inplace=True)\n", "wish_df.drop_duplicates(['product_id'],keep='last', inplace=True)\n", "\n", "print(wish_df.duplicated().sum())\n", "print(wish_df.duplicated('product_id').sum())\n" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "title object\n", "price float64\n", "retail_price int64\n", "currency object\n", "units_sold int64\n", "uses_ad_boosts int64\n", "rating float64\n", "rating_count int64\n", "rating_five_count float64\n", "rating_four_count float64\n", "rating_three_count float64\n", "rating_two_count float64\n", "rating_one_count float64\n", "badges_count int64\n", "badge_local_product int64\n", "badge_product_quality int64\n", "badge_fast_shipping int64\n", "tags object\n", "product_color object\n", "product_variation_size_id object\n", "product_variation_inventory int64\n", "shipping_option_name object\n", "shipping_option_price int64\n", "shipping_is_express int64\n", "countries_shipped_to int64\n", "inventory_total int64\n", "has_urgency_banner float64\n", "urgency_text object\n", "origin_country object\n", "merchant_title object\n", "merchant_name object\n", "merchant_info_subtitle object\n", "merchant_rating_count int64\n", "merchant_rating float64\n", "merchant_id object\n", "merchant_has_profile_picture int64\n", "product_url object\n", "product_picture object\n", "product_id object\n", "theme object\n", "dtype: object" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#filter outliers\n", "\n", "low_bounary = titanic_df['Age'].mean() - 2 * titanic_df['Age'].std()\n", "\n", "wish_df.dtypes" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }