SQLAlchemy- Climate Analysis and Exploration

annahiraeta
climate_starter.ipynb

{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from matplotlib import style\n", "style.use('fivethirtyeight')\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import datetime as dt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Reflect Tables into SQLAlchemy ORM" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Python SQL toolkit and Object Relational Mapper\n", "import sqlalchemy\n", "from sqlalchemy.ext.automap import automap_base\n", "from sqlalchemy.orm import Session\n", "from sqlalchemy import create_engine, func" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "engine = create_engine(\"sqlite:///Resources/hawaii.sqlite\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# reflect an existing database into a new model\n", "\n", "# reflect the tables" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# We can view all of the classes that automap found\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Save references to each table\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create our session (link) from Python to the DB\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Exploratory Climate Analysis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Design a query to retrieve the last 12 months of precipitation data and plot the results\n", "\n", "# Calculate the date 1 year ago from the last data point in the database\n", "\n", "# Perform a query to retrieve the data and precipitation scores\n", "\n", "# Save the query results as a Pandas DataFrame and set the index to the date column\n", "\n", "# Sort the dataframe by date\n", "\n", "# Use Pandas Plotting with Matplotlib to plot the data\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Use Pandas to calcualte the summary statistics for the precipitation data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Design a query to show how many stations are available in this dataset?\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# What are the most active stations? (i.e. what stations have the most rows)?\n", "# List the stations and the counts in descending order.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Using the station id from the previous query, calculate the lowest temperature recorded, \n", "# highest temperature recorded, and average temperature of the most active station?\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Choose the station with the highest number of temperature observations.\n", "# Query the last 12 months of temperature observation data for this station and plot the results as a histogram\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bonus Challenge Assignment" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This function called `calc_temps` will accept start date and end date in the format '%Y-%m-%d' \n", "# and return the minimum, average, and maximum temperatures for that range of dates\n", "def calc_temps(start_date, end_date):\n", " \"\"\"TMIN, TAVG, and TMAX for a list of dates.\n", " \n", " Args:\n", " start_date (string): A date string in the format %Y-%m-%d\n", " end_date (string): A date string in the format %Y-%m-%d\n", " \n", " Returns:\n", " TMIN, TAVE, and TMAX\n", " \"\"\"\n", " \n", " return session.query(func.min(Measurement.tobs), func.avg(Measurement.tobs), func.max(Measurement.tobs)).\\\n", " filter(Measurement.date >= start_date).filter(Measurement.date <= end_date).all()\n", "\n", "# function usage example\n", "print(calc_temps('2012-02-28', '2012-03-05'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Use your previous function `calc_temps` to calculate the tmin, tavg, and tmax \n", "# for your trip using the previous year's data for those same dates.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot the results from your previous query as a bar chart. \n", "# Use \"Trip Avg Temp\" as your Title\n", "# Use the average temperature for the y value\n", "# Use the peak-to-peak (tmax-tmin) value as the y error bar (yerr)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Calculate the total amount of rainfall per weather station for your trip dates using the previous year's matching dates.\n", "# Sort this in descending order by precipitation amount and list the station, name, latitude, longitude, and elevation\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create a query that will calculate the daily normals \n", "# (i.e. the averages for tmin, tmax, and tavg for all historic data matching a specific month and day)\n", "\n", "def daily_normals(date):\n", " \"\"\"Daily Normals.\n", " \n", " Args:\n", " date (str): A date string in the format '%m-%d'\n", " \n", " Returns:\n", " A list of tuples containing the daily normals, tmin, tavg, and tmax\n", " \n", " \"\"\"\n", " \n", " sel = [func.min(Measurement.tobs), func.avg(Measurement.tobs), func.max(Measurement.tobs)]\n", " return session.query(*sel).filter(func.strftime(\"%m-%d\", Measurement.date) == date).all()\n", " \n", "daily_normals(\"01-01\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# calculate the daily normals for your trip\n", "# push each tuple of calculations into a list called `normals`\n", "\n", "# Set the start and end date of the trip\n", "\n", "# Use the start and end date to create a range of dates\n", "\n", "# Stip off the year and save a list of %m-%d strings\n", "\n", "# Loop through the list of %m-%d strings and calculate the normals for each date\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the previous query results into a Pandas DataFrame and add the `trip_dates` range as the `date` index\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot the daily normals as an area plot with `stacked=False`\n" ] } ], "metadata": { "kernel_info": { "name": "python3" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" }, "nteract": { "version": "0.12.3" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }