Skip to content

Instantly share code, notes, and snippets.

@franklinokech
Last active June 4, 2020 09:06
Show Gist options
  • Save franklinokech/96f65be38970747cfdde69c8a353e26e to your computer and use it in GitHub Desktop.
Save franklinokech/96f65be38970747cfdde69c8a353e26e to your computer and use it in GitHub Desktop.
This gist contains the key data wrangling of python pandas
idx = 0
new_col = [7, 8, 9] # can be a list, a Series, an array or a scalar
df.insert(loc=idx, column='A', value=new_col)
df['composite_column'] =df['string_col'] + '-' + df['date_column'].astype(str)
# Convert a pandas column to Date data type
df.date_column = pd.to_datetime(df.date_column, format='%d-%m-%Y')
# Convert a given column to lower case
df.column_name = df.column_name.str.lower()
from google.colab import auth
from gspread_dataframe import get_as_dataframe, set_with_dataframe
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())
sh = gc.open('Google Sheet File Name')
# Select Spreadsheet
# By title
worksheet = sh.worksheet('Tab within File')
# Append Dataframe to Sheet
set_with_dataframe(worksheet, df)
# Get frequency percentage by values in column 'City'
frequency = empDfObj['City'].value_counts(normalize =True)
print("Frequency of values as percentage in column 'City' :")
print(frequency * 100)
df_merged = pd.merge(left=df_left, right=df_right, left_on='primary_key', right_on='primary_key', how='left')
# This snippet pre-appends string to a column values
df['col'] = 'str' + df['col'].astype(str)
# Remove trailing spaces in column names
df.columns = [x.strip() for x in df.columns]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment