data science

profileRoyal9
01_data_selection.pdf

data_selection

October 19, 2022

[1]: # Data Selection

[2]: import numpy as np

[3]: # This is weather data recorded in Memphis during summer (June to September). # Column 0: month # Column 1: temperature in Farenheit # Column 2: precipitation in inches

data = np.array([ [6, 70, 3], [7, 75, 3], [6, 85, 4], [7, 90, 4], [7, 91, 5], [8, 85, 2], [8, 87, 4], [6, 83, 5], [8, 77, 3], [6, 69, 6], [9, 68, 1], [6, 80, 6], [9, 65, 3], [9, 75, 4], [9, 80, 5]])

[4]: data.shape

[4]: (15, 3)

[5]: # Select the data for the row 0: data[0, :] # row_selection: 0 # column_selection: all

[5]: array([ 6, 70, 3])

1

[6]: # Select the data of column 2: data[:, 2] # row_selection: all # column_selection: 2

[6]: array([3, 3, 4, 4, 5, 2, 4, 5, 3, 6, 1, 6, 3, 4, 5])

[7]: # Get the data for the first five rows. data[0:5, :]

[7]: array([[ 6, 70, 3], [ 7, 75, 3], [ 6, 85, 4], [ 7, 90, 4], [ 7, 91, 5]])

[8]: # Get the data for the first five rows, # and the first two columns. data[0:5, 0:2]

[8]: array([[ 6, 70], [ 7, 75], [ 6, 85], [ 7, 90], [ 7, 91]])

[9]: # Get the data for the last two columns, # and the first five rows. data[0:5, 1:3]

[9]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]])

[10]: # or can be written as data[:5, 1:]

[10]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]])

[11]: # or can be written as data[:5, -2:]

2

[11]: array([[70, 3], [75, 3], [85, 4], [90, 4], [91, 5]])

[12]: # Get the last 4 rows data[-4:, :]

[12]: array([[ 6, 80, 6], [ 9, 65, 3], [ 9, 75, 4], [ 9, 80, 5]])

[13]: # Find the temperature values, and store them in a variable temp = data[:, 1]

[14]: temp

[14]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 68, 80, 65, 75, 80])

[15]: # Find the month values, and store them in a variable month = data[:, 0]

[16]: month

[16]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9])

[17]: # Find the maximum temperature np.max(temp)

[17]: 91

[18]: # Find the index (or position) of the maximum temperature np.argmax(temp)

[18]: 4

[19]: # Find the month that corresponds to the maximum temperature data[np.argmax(temp), 0]

[19]: 7

[20]: m = np.argmax(temp) data[m, 0]

[20]: 7

3

[21]: # boolean selection

[22]: # Find all the temperatures below 70 degrees data[temp < 70, 1]

[22]: array([69, 68, 65])

[23]: # Find the months with temperatures below 70 degrees data[temp < 70, 0]

[23]: array([6, 9, 9])

[24]: np.unique(data[temp < 70, 0])

[24]: array([6, 9])

[25]: # Find all the temperatures for the month of August data[month == 8, 1]

[25]: array([85, 87, 77])

[26]: # Find the average temperature for August np.average(data[month == 8, 1])

[26]: 83.0

[27]: # Find the temperatures above 80 for June data[(month == 6) & (temp > 80), 1] # & means and

[27]: array([85, 83])

[28]: # Find the temperatures for the months of June, July, and August data[month != 9, 1]

[28]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])

[29]: data[(month == 6) | (month == 7) | (month == 8), 1]

[29]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])

[30]: data[(month >= 6) & (month <= 8), 1]

[30]: array([70, 75, 85, 90, 91, 85, 87, 83, 77, 69, 80])

[31]: # Print the average temperature for each month: for x in [6, 7, 8, 9]:

print(np.average(data[month == x, 1]))

4

77.4 85.33333333333333 83.0 72.0

[32]: # Find the average temperature for each month, and store it in a list: [np.average(data[month == x, 1]) for x in [6, 7, 8, 9]]

[32]: [77.4, 85.33333333333333, 83.0, 72.0]

[33]: month

[33]: array([6, 7, 6, 7, 7, 8, 8, 6, 8, 6, 9, 6, 9, 9, 9])

[34]: # Display the the data set sorted by month data[month.argsort()]

[34]: array([[ 6, 70, 3], [ 6, 85, 4], [ 6, 83, 5], [ 6, 69, 6], [ 6, 80, 6], [ 7, 75, 3], [ 7, 90, 4], [ 7, 91, 5], [ 8, 85, 2], [ 8, 87, 4], [ 8, 77, 3], [ 9, 68, 1], [ 9, 65, 3], [ 9, 75, 4], [ 9, 80, 5]])

[35]: month.argsort?

Docstring: a.argsort(axis=-1, kind=None, order=None)

Returns the indices that would sort this array.

Refer to `numpy.argsort` for full documentation.

See Also -------- numpy.argsort : equivalent function Type: builtin_function_or_method

5

[ ]:

6