shanealynn/Pandas index - loc selection examples.py

## Pandas index - loc selection examples.py

# Select rows with first name Antonio, # and all columns between 'city' and 'email'
data.loc[data['first_name'] == 'Antonio', 'city':'email']

# Select rows where the email column ends with 'hotmail.com', include all columns
data.loc[data['email'].str.endswith("hotmail.com")]

# Select rows with last_name equal to some values, all columns
data.loc[data['first_name'].isin(['France', 'Tyisha', 'Eric'])]

# Select rows with first name Antonio AND hotmail email addresses
data.loc[data['email'].str.endswith("gmail.com") & (data['first_name'] == 'Antonio')]

# select rows with id column between 100 and 200, and just return 'postal' and 'web' columns
data.loc[(data['id'] > 100) & (data['id'] <= 200), ['postal', 'web']]

# A lambda function that yields True/False values can also be used.
# Select rows where the company name has 4 words in it.
data.loc[data['company_name'].apply(lambda x: len(x.split(' ')) == 4)]

# Selections can be achieved outside of the main .loc for clarity:
# Form a separate variable with your selections:
idx = data['company_name'].apply(lambda x: len(x.split(' ')) == 4)
# Select only the True values in 'idx' and only the 3 columns specified:
data.loc[idx, ['email', 'first_name', 'company']]

	# Select rows with first name Antonio, # and all columns between 'city' and 'email'
	data.loc[data['first_name'] == 'Antonio', 'city':'email']

	# Select rows where the email column ends with 'hotmail.com', include all columns
	data.loc[data['email'].str.endswith("hotmail.com")]

	# Select rows with last_name equal to some values, all columns
	data.loc[data['first_name'].isin(['France', 'Tyisha', 'Eric'])]

	# Select rows with first name Antonio AND hotmail email addresses
	data.loc[data['email'].str.endswith("gmail.com") & (data['first_name'] == 'Antonio')]

	# select rows with id column between 100 and 200, and just return 'postal' and 'web' columns
	data.loc[(data['id'] > 100) & (data['id'] <= 200), ['postal', 'web']]

	# A lambda function that yields True/False values can also be used.
	# Select rows where the company name has 4 words in it.
	data.loc[data['company_name'].apply(lambda x: len(x.split(' ')) == 4)]

	# Selections can be achieved outside of the main .loc for clarity:
	# Form a separate variable with your selections:
	idx = data['company_name'].apply(lambda x: len(x.split(' ')) == 4)
	# Select only the True values in 'idx' and only the 3 columns specified:
	data.loc[idx, ['email', 'first_name', 'company']]