data science
data_selection
October 19, 2022
[1]: # Data Selection
[2]: import numpy as np
[3]: # This is weather data recorded in Memphis during summer (June to September). # Column 0: month # Column 1: temperature in Farenheit # Column 2: precipitation in inches
data = np.array([ [6, 70, 3], [7, 75, 3], [6, 85, 4], [7, 90, 4], [7, 91, 5], [8, 85, 2], [8, 87, 4], [6, 83, 5], [8, 77, 3], [6, 69, 6], [9, 68, 1], [6, 80, 6], [9, 65, 3], [9, 75, 4], [9, 80, 5]])
[4]: data.shape
[4]: (15, 3)
[5]: # Select the data for the row 0: data[0, :] # row_selection: 0 # column_selection: all
[5]: array([ 6, 70, 3])
1
[6]: # Select the data of column 2: data[:, 2] # row_selection: all # column_selection: 2
[6]: array([3, 3, 4, 4, 5, 2, 4, 5, 3, 6, 1, 6, 3, 4, 5])
[7]: # Get the data for the first five rows. data[0:5, :]
[7]: array([[ 6, 70, 3], [ 7, 75, 3], [ 6, 85, 4], [ 7, 90, 4], [ 7, 91, 5]])
[8]: # Get the data for the first five rows, # and the first two columns. data[0:5, 0:2]
[8]: array([[ 6, 70], [ 7, 75], [ 6, 85], [ 7, 90], [ 7, 91]])
[9]: # Get the data for the last two columns, # and the first five rows. data[0:5, 1:3]
[9]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]])
[10]: # or can be written as data[:5, 1:]
[10]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]])
[11]: # or can be written as data[:5, -2:]
2
[11]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]])
[12]: # Get the last 4 rows data[-4:, :]
[12]: array([[ 6, 80, 6], [ 9, 65, 3], [ 9, 75, 4], [ 9, 80, 5]])
[13]: # Find the temperature values, and store them in a variable temp = data[:, 1]
[14]: temp
[14]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 68, 80, 65, 75, 80])
[15]: # Find the month values, and store them in a variable month = data[:, 0]
[16]: month
[16]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9])
[17]: # Find the maximum temperature np.max(temp)
[17]: 91
[18]: # Find the index (or position) of the maximum temperature np.argmax(temp)
[18]: 4
[19]: # Find the month that corresponds to the maximum temperature data[np.argmax(temp), 0]
[19]: 7
[20]: m = np.argmax(temp) data[m, 0]
[20]: 7
3
[21]: # boolean selection
[22]: # Find all the temperatures below 70 degrees data[temp < 70, 1]
[22]: array([69, 68, 65])
[23]: # Find the months with temperatures below 70 degrees data[temp < 70, 0]
[23]: array([6, 9, 9])
[24]: np.unique(data[temp < 70, 0])
[24]: array([6, 9])
[25]: # Find all the temperatures for the month of August data[month == 8, 1]
[25]: array([85, 87, 77])
[26]: # Find the average temperature for August np.average(data[month == 8, 1])
[26]: 83.0
[27]: # Find the temperatures above 80 for June data[(month == 6) & (temp > 80), 1] # & means and
[27]: array([85, 83])
[28]: # Find the temperatures for the months of June, July, and August data[month != 9, 1]
[28]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])
[29]: data[(month == 6) | (month == 7) | (month == 8), 1]
[29]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])
[30]: data[(month >= 6) & (month <= 8), 1]
[30]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])
[31]: # Print the average temperature for each month: for x in [6, 7, 8, 9]:
print(np.average(data[month == x, 1]))
4
77.4 85.33333333333333 83.0 72.0
[32]: # Find the average temperature for each month, and store it in a list: [np.average(data[month == x, 1]) for x in [6, 7, 8, 9]]
[32]: [77.4, 85.33333333333333, 83.0, 72.0]
[33]: month
[33]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9])
[34]: # Display the the data set sorted by month data[month.argsort()]
[34]: array([[ 6, 70, 3], [ 6, 85, 4], [ 6, 83, 5], [ 6, 69, 6], [ 6, 80, 6], [ 7, 75, 3], [ 7, 90, 4], [ 7, 91, 5], [ 8, 85, 2], [ 8, 87, 4], [ 8, 77, 3], [ 9, 68, 1], [ 9, 65, 3], [ 9, 75, 4], [ 9, 80, 5]])
[35]: month.argsort?
Docstring: a.argsort(axis=-1, kind=None, order=None)
Returns the indices that would sort this array.
Refer to `numpy.argsort` for full documentation.
See Also -------- numpy.argsort : equivalent function Type: builtin_function_or_method
5
[ ]:
6