Skip to content

Instantly share code, notes, and snippets.

View zero731's full-sized avatar

Max Steele zero731

View GitHub Profile
## Bin registered voters into generation groups using pd.cut
# Define group labels
cut_labels = ['Teens', "20's", "30's", "40's", "50's", "60's", "70's", "80's", "90's", "100's"]
# Define bin edges
cut_bins = np.arange(10, 111, 10)
# Create a new column grouping birth_year into generations
df['cut_age'] = pd.cut(df['age'], bins=cut_bins, labels=cut_labels)
## Bin registered voters into generation groups using pd.cut
# Define bin edges
cut_bins = np.arange(10, 111, 10)
# Create a new column grouping birth_year into generations
df['cut_age'] = pd.cut(df['age'], bins=cut_bins)
## Bin registered voters into generation groups using pd.cut
# Define group labels
cut_labels = ['Greatest-Silent', 'Boomer', 'GenX', 'Millennial', 'GenZ']
# Define bin edges
cut_bins = [0, 1945, 1964, 1980, 1996, 2100]
# Create a new column grouping birth_year into generations
df['cut_generation'] = pd.cut(df['birth_year'], bins=cut_bins, labels=cut_labels)
df['generation'] = df['birth_year'].apply(get_gen_grp)
## Define function for grouping into generation categories by birth year
def get_gen_grp(birth_year):
if birth_year < 1946:
return 'Greatest-Silent'
elif (birth_year > 1945) & (birth_year < 1965):
return 'Boomer'
elif (birth_year > 1964) & (birth_year < 1981):
rvws_slice.insert(2, 'rvw_len', rvws_slice['review'].map(lambda x: len(x)))
rvws_slice.head()
rvws_slice['as_rvw'] = 1
rvws_slice.head()
rvws_slice = rvw_df[['app_id', 'review', 'rating', 'date']]
rvws_slice.head()
app_df.columns = [col.lower() for col in app_df.columns]
app_df.columns = [col.replace(' ', '_') for col in app_df.columns]