data science
vis_matplotlib
October 25, 2022
[1]: # data visualization
[2]: import numpy as np
[3]: weather = np.genfromtxt('weather_madrid.csv', delimiter=',', skip_header=True,␣ ↪filling_values=0.0)
[4]: weather
[4]: array([[1.997e+03, 1.000e+00, 1.000e+00, …, 6.000e+00, 0.000e+00, 2.290e+02],
[1.997e+03, 1.000e+00, 2.000e+00, …, 5.000e+00, 0.000e+00, 1.430e+02],
[1.997e+03, 1.000e+00, 3.000e+00, …, 6.000e+00, 0.000e+00, 2.560e+02],
…, [2.015e+03, 1.200e+01, 2.900e+01, …, 5.000e+00, 0.000e+00, 1.900e+02],
[2.015e+03, 1.200e+01, 3.000e+01, …, 6.000e+00, 0.000e+00, 2.560e+02],
[2.015e+03, 1.200e+01, 3.100e+01, …, 6.000e+00, 0.000e+00, 3.130e+02]])
[5]: np.set_printoptions(suppress=True)
[6]: weather
[6]: array([[1997., 1., 1., …, 6., 0., 229.], [1997., 1., 2., …, 5., 0., 143.], [1997., 1., 3., …, 6., 0., 256.], …, [2015., 12., 29., …, 5., 0., 190.], [2015., 12., 30., …, 6., 0., 256.], [2015., 12., 31., …, 6., 0., 313.]])
[7]: weather[1, :] # row1
1
[7]: array([1997., 1., 2., 7., 3., 0., 6., 3., 0., 100., 92., 71., 1007., 1003., 997., 10., 9., 4., 26., 8., 47., 0., 5., 0., 143.])
[8]: import matplotlib.pyplot as plt
[9]: # Plot 1: Plot of monthly average temperatures.
[10]: month = weather[:, 1]
[11]: # step1: find average temperature for january
[12]: np.average(weather[month == 1, 4]) # since column 4 has the mean temperatures.
[12]: 5.688729874776387
[13]: # step 2: find the average temperature for all months, store them in array Y.
[14]: for x in range(1, 13): # 1 in included in the range, 13 is excluded. print(x)
1 2 3 4 5 6 7 8 9 10 11 12
[15]: X = [x for x in range(1, 13)] X
[15]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
[16]: Y = [np.average(weather[month == x, 4]) for x in X] Y
[16]: [5.688729874776387, 6.947069943289225, 10.14874551971326, 12.85925925925926, 16.667235494880547,
2
22.037366548042705, 25.074702886247877, 24.742556917688265, 20.49473684210526, 15.152801358234296, 9.196491228070176, 5.859083191850594]
[17]: # step 3: create a plot
[18]: plt.plot(X, Y)
[18]: [<matplotlib.lines.Line2D at 0x125d1dbbe80>]
[19]: plt.bar(X, Y)
[19]: <BarContainer object of 12 artists>
3
[20]: # Plot 2: Monthly minimum (blue), mean (green), and maximum (red) recorded␣ ↪temperatures.
[21]: X = [x for x in range(1, 13)] X
[21]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
[22]: Y_min = [np.min(weather[month == x, 5]) for x in X] Y_mean = [np.average(weather[month == x, 4]) for x in X] Y_max = [np.max(weather[month == x, 3]) for x in X]
[23]: plt.plot(X, Y_min, 'b') plt.plot(X, Y_mean, 'g') plt.plot(X, Y_max, 'r')
[23]: [<matplotlib.lines.Line2D at 0x125d27a1190>]
4
[24]: Y_min = [np.average(weather[month == x, 5]) for x in X] Y_mean = [np.average(weather[month == x, 4]) for x in X] Y_max = [np.average(weather[month == x, 3]) for x in X]
[25]: plt.plot(X, Y_min, 'b') plt.plot(X, Y_mean, 'g') plt.plot(X, Y_max, 'r')
[25]: [<matplotlib.lines.Line2D at 0x125d27fbe80>]
5
[26]: # Plot 3: Average yearly temperatures
[27]: # for the months: # month = weather[:, 1] # X = [x for x in range(1, 13)] # Y = [np.average(weather[month == x, 4]) for x in X]
[28]: # for the years? year = weather[:, 0] # X = [x for x in range(1997, 2016)] X = np.unique(year) X.sort() Y = [np.average(weather[year == x, 4]) for x in X]
[29]: plt.bar(X, Y)
[29]: <BarContainer object of 19 artists>
6
[30]: # Plot 4: Histogram of number of data points per temperature interval # visualize the distribution of the data
[31]: plt.hist(weather[:, 4], bins=5, ec='black')
[31]: (array([ 414., 2013., 1825., 1808., 752.]), array([-3., 4., 11., 18., 25., 32.]), <BarContainer object of 5 artists>)
7
[32]: # for example, we have about 2000 temperatures between 4 and 11 degrees (second␣ ↪bar).
[ ]:
8