Last active
April 24, 2023 18:04
-
-
Save shaunagm/d429ace958ee6ce1b71fbe7884611348 to your computer and use it in GitHub Desktop.
Script for "Intro to ETL" Training (Part 1)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#----------------------------------------------------------- | |
# Import Necessary Packages | |
#----------------------------------------------------------- | |
import json | |
from parsons import Table, MobilizeAmerica, GoogleSheets | |
from datetime import datetime | |
#----------------------------------------------------------- | |
# Instantiate Classes/Connect to Mobilize and Google Sheets | |
#----------------------------------------------------------- | |
# Parsons looks for environmental variables MOBILIZE_AMERICA_API_KEY | |
# and GOOGLE_DRIVE_CREDENTIALS when you run the following lines | |
mobilize = MobilizeAmerica() | |
google_sheets = GoogleSheets() | |
#----------------------------------------------------------- | |
# Extract data from Mobilize America and explore the table | |
#----------------------------------------------------------- | |
attendance_records = mobilize.get_attendances() | |
# Let's take a look ... | |
# Parsons Table documentation: https://move-coop.github.io/parsons/html/table | |
# You can think of it as similar to a pandas df! But it's faster | |
attendance_records | |
# Each row can be treated as a dictionary | |
for index, attendance in enumerate(attendance_records): | |
print(attendance['person']) | |
if index == 5: | |
break | |
# Check the number of rows | |
attendance_records.num_rows | |
# Check out what columns exist in the table | |
attendance_records.columns | |
#----------------------------------------------------------- | |
# Transform Mobilize data | |
#----------------------------------------------------------- | |
# Convert date column from unix to EST. | |
# Quickly define a function that converts input to a legible date string | |
def convert_to_legible_date(unix_date): | |
return datetime.utcfromtimestamp(int(unix_date)).strftime('%Y-%m-%d %H:%M:%S') | |
# Now convert all columns in timeslot_start_date column using that function | |
attendance_records.convert_column('timeslot_start_date', convert_to_legible_date) | |
# Each persons contact info is crammed into one column as a JSON string. Ew! | |
# Let's fix that | |
# First convert column value from json string to python dictionary | |
attendance_records.convert_column('person', json.loads) | |
# Now use fun ETL function to unpack the dictionary into multiple columns! | |
attendance_records.unpack_dict('person', prepend=False) | |
# Finally, we're going to break the event attendances up by | |
# the month that they happened. | |
jan_attendances = attendance_records.select_rows("'2022-01' in {timeslot_start_date}") | |
feb_attendances = attendance_records.select_rows("'2022-02' in {timeslot_start_date}") | |
mar_attendances = attendance_records.select_rows("'2022-03' in {timeslot_start_date}") | |
### Side note for advanced usage! | |
# Parsons tables are built from PETL tables, which means you can use any PETL function on a parsons table! | |
# https://petl.readthedocs.io/en/stable/ | |
# .table method converts into a petl table | |
petl_table = attendance_records.table | |
petl_table | |
# Example of a useful petl function to count signups by event -- Aggregate() | |
sign_ups_by_event_petl = petl_table.aggregate('event_title', len) | |
# Table() function converts back to parsons table | |
sign_ups_by_event_parsons = Table(sign_ups_by_event_petl) | |
#----------------------------------------------------------- | |
# Load data into Google Sheets | |
#----------------------------------------------------------- | |
spreadsheet_name = "Volunteer Attendance Records" | |
folder_id = "1y1jgygK5YUQLVrgRgNw7A8Hf2ppqOJJZ" # get from URL | |
# Creates a new spreadsheet | |
sheet_id = google_sheets.create_spreadsheet(spreadsheet_name, folder_id=folder_id) | |
# sheet_id = # get from command line for re-running script | |
# Use the ID to update the spreadsheet | |
google_sheets.overwrite_sheet(sheet_id, jan_attendances) | |
google_sheets.overwrite_sheet(sheet_id, feb_attendances) | |
# Note: if you want to be fancy, the optional parameter value_input_option = True | |
# allows you to supply data as formulas, not just raw values | |
# Overwrite erases any existing data. You can also append to the end of a sheet | |
google_sheets.append_to_sheet(sheet_id, feb_attendances) | |
google_sheets.append_to_sheet(sheet_id, mar_attendances) | |
# You can even format cells! | |
red = {"red": 1.0, "green": 0.0, "blue": 0.0} | |
google_sheets.format_cells(sheet_id, "A1", {"backgroundColor": red}, worksheet=0) | |
# Let's mark all the people who failed to attend in red. | |
google_sheets.overwrite_sheet(sheet_id, attendance_records) # overwrite sheet | |
for index, row in enumerate(attendance_records): | |
adjusted_index = index + 2 # accounts for python zero-indexing and header row | |
if row["attended"] == "false": | |
cell_range = f"A{adjusted_index}:N{adjusted_index}" | |
google_sheets.format_cells(sheet_id, cell_range, {"backgroundColor": red}, worksheet=0) | |
# NOTE: the Parsons connector is more limited than the Google Sheets API. If you want to use something on the Google Sheets API, | |
# you can access the client. Let's mark the people who failed to attend in red again, this time using the underlying client | |
# to do so. | |
google_sheets.overwrite_sheet(sheet_id, attendance_records) # overwrite sheet | |
worksheet = google_sheets.gspread_client.open(spreadsheet_name).sheet1 # get client's worksheet object | |
for index, row in enumerate(attendance_records): | |
adjusted_index = index + 2 # accounts for python zero-indexing and header row | |
if row["attended"] == "false": | |
cell_range = f"A{adjusted_index}:N{adjusted_index}" | |
worksheet.format(cell_range, {"backgroundColor": red}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment