data science

profileRoyal9
vis_matplotlib.pdf

vis_matplotlib

October 25, 2022

[1]: # data visualization

[2]: import numpy as np

[3]: weather = np.genfromtxt('weather_madrid.csv', delimiter=',', skip_header=True,␣ ↪filling_values=0.0)

[4]: weather

[4]: array([[1.997e+03, 1.000e+00, 1.000e+00, …, 6.000e+00, 0.000e+00, 2.290e+02],

[1.997e+03, 1.000e+00, 2.000e+00, …, 5.000e+00, 0.000e+00, 1.430e+02],

[1.997e+03, 1.000e+00, 3.000e+00, …, 6.000e+00, 0.000e+00, 2.560e+02],

…, [2.015e+03, 1.200e+01, 2.900e+01, …, 5.000e+00, 0.000e+00, 1.900e+02],

[2.015e+03, 1.200e+01, 3.000e+01, …, 6.000e+00, 0.000e+00, 2.560e+02],

[2.015e+03, 1.200e+01, 3.100e+01, …, 6.000e+00, 0.000e+00, 3.130e+02]])

[5]: np.set_printoptions(suppress=True)

[6]: weather

[6]: array([[1997., 1., 1., …, 6., 0., 229.], [1997., 1., 2., …, 5., 0., 143.], [1997., 1., 3., …, 6., 0., 256.], …, [2015., 12., 29., …, 5., 0., 190.], [2015., 12., 30., …, 6., 0., 256.], [2015., 12., 31., …, 6., 0., 313.]])

[7]: weather[1, :] # row1

1

[7]: array([1997., 1., 2., 7., 3., 0., 6., 3., 0., 100., 92., 71., 1007., 1003., 997., 10., 9., 4., 26., 8., 47., 0., 5., 0., 143.])

[8]: import matplotlib.pyplot as plt

[9]: # Plot 1: Plot of monthly average temperatures.

[10]: month = weather[:, 1]

[11]: # step1: find average temperature for january

[12]: np.average(weather[month == 1, 4]) # since column 4 has the mean temperatures.

[12]: 5.688729874776387

[13]: # step 2: find the average temperature for all months, store them in array Y.

[14]: for x in range(1, 13): # 1 in included in the range, 13 is excluded. print(x)

1 2 3 4 5 6 7 8 9 10 11 12

[15]: X = [x for x in range(1, 13)] X

[15]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

[16]: Y = [np.average(weather[month == x, 4]) for x in X] Y

[16]: [5.688729874776387, 6.947069943289225, 10.14874551971326, 12.85925925925926, 16.667235494880547,

2

22.037366548042705, 25.074702886247877, 24.742556917688265, 20.49473684210526, 15.152801358234296, 9.196491228070176, 5.859083191850594]

[17]: # step 3: create a plot

[18]: plt.plot(X, Y)

[18]: [<matplotlib.lines.Line2D at 0x125d1dbbe80>]

[19]: plt.bar(X, Y)

[19]: <BarContainer object of 12 artists>

3

[20]: # Plot 2: Monthly minimum (blue), mean (green), and maximum (red) recorded␣ ↪temperatures.

[21]: X = [x for x in range(1, 13)] X

[21]: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

[22]: Y_min = [np.min(weather[month == x, 5]) for x in X] Y_mean = [np.average(weather[month == x, 4]) for x in X] Y_max = [np.max(weather[month == x, 3]) for x in X]

[23]: plt.plot(X, Y_min, 'b') plt.plot(X, Y_mean, 'g') plt.plot(X, Y_max, 'r')

[23]: [<matplotlib.lines.Line2D at 0x125d27a1190>]

4

[24]: Y_min = [np.average(weather[month == x, 5]) for x in X] Y_mean = [np.average(weather[month == x, 4]) for x in X] Y_max = [np.average(weather[month == x, 3]) for x in X]

[25]: plt.plot(X, Y_min, 'b') plt.plot(X, Y_mean, 'g') plt.plot(X, Y_max, 'r')

[25]: [<matplotlib.lines.Line2D at 0x125d27fbe80>]

5

[26]: # Plot 3: Average yearly temperatures

[27]: # for the months: # month = weather[:, 1] # X = [x for x in range(1, 13)] # Y = [np.average(weather[month == x, 4]) for x in X]

[28]: # for the years? year = weather[:, 0] # X = [x for x in range(1997, 2016)] X = np.unique(year) X.sort() Y = [np.average(weather[year == x, 4]) for x in X]

[29]: plt.bar(X, Y)

[29]: <BarContainer object of 19 artists>

6

[30]: # Plot 4: Histogram of number of data points per temperature interval # visualize the distribution of the data

[31]: plt.hist(weather[:, 4], bins=5, ec='black')

[31]: (array([ 414., 2013., 1825., 1808., 752.]), array([-3., 4., 11., 18., 25., 32.]), <BarContainer object of 5 artists>)

7

[32]: # for example, we have about 2000 temperatures between 4 and 11 degrees (second␣ ↪bar).

[ ]:

8