Last active
July 13, 2022 21:13
-
-
Save shaunagm/b99142cd820b87250e869201ee56012c to your computer and use it in GitHub Desktop.
Script for Day 2 of Intro to Parsons Training
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################## | |
### Get data from Mobilize ### | |
############################## | |
from parsons import MobilizeAmerica | |
"""Don't forget to set the MOBILIZE_AMERICA_API_KEY environment variable""" | |
mobilize = MobilizeAmerica() | |
mobilize.uri = "https://staging-api.mobilize.us/v1/" # only necessary since we're using a "developer sandbox" | |
# people = mobilize.get_people(organization_id=182) | |
# print(people) | |
"""The data returned from Mobilize (or any other connector) is stored as a Parsons Table. We can explore | |
our Parsons table with a variety of helper functions.""" | |
# print(people.columns) | |
# print(people.num_rows) | |
"""Parsons is built on top of the PETL library, which has its own helper functions. You can access the | |
underlying PETL methods with .table:""" | |
# print(people.table.look(limit=10)) # 'look' displays the first x rows | |
###################### | |
### Transform Data ### | |
###################### | |
"""Mobilize provides phone, email, and zip codes as lists of dictionaries with the strucuture: | |
[{"primary": True, "number": "444-444-4444"}, {"primary": False, "number": "555-555-5555"}] | |
But other platforms may want a single string for phone/zip/email contacts. Luckily Parsons can help | |
us reformat!""" | |
def get_primary_contact_or_first(contact_list): | |
extracted_contacts = [] | |
for contact in contact_list: | |
if "number" in contact: selector = "number" | |
if "postal_code" in contact: selector = "postal_code" | |
if "address" in contact: selector = "address" | |
if contact["primary"]: | |
return contact[selector] | |
else: | |
extracted_contacts.append(contact[selector]) | |
if extracted_contacts: | |
return extracted_contacts[0] | |
return None | |
"""The convert_column method applies the function row-by-row to the cells in the specified column. The | |
original cell value is passed in as the function input, and whatever the function returns is the new | |
cell value.""" | |
# people.convert_column('phone_numbers', get_primary_contact_or_first) | |
# people.convert_column('postal_addresses', get_primary_contact_or_first) | |
# people.convert_column('email_addresses', get_primary_contact_or_first) | |
"""Mobilize provides the "created_date" for a person record as a unix timestamp. This is a quirky but | |
widely used format that describes a point in time by the number of seconds that have elapsed since | |
January 1st, 1970. We can use the datetime library and the convert_column function to translate the | |
unix timestamp into something more readable.""" | |
from datetime import datetime | |
def convert_to_legible_date(unix_date): | |
return datetime.utcfromtimestamp(int(unix_date)).strftime('%Y-%m-%d %H:%M:%S') | |
# people.convert_column("created_date", convert_to_legible_date) | |
"""You can also do a simple column rename.""" | |
# people.rename_column("created_date", "date_added") | |
"""In addition to manipulating the column data, you can select specific rows to create a data subset. | |
There are two syntaxes for doing this: f-strings and lambda functions. Lambda functions are more | |
flexible but harder to reason about, so I usually use f-strings if I can.""" | |
# created_this_year = people.select_rows("'2022-' in {created_date}") # f-string | |
# has_phone_number = people.select_rows(lambda row: row.phone_numbers) # lambda function | |
"""We can also use Parsons to add new columns to our dataset. For instance, what if we wanted to get | |
people who had come to events and then upload them to a new platform (in our case, Action Network) | |
along with a tag saying what event they'd signed up for? """ | |
# attendances = mobilize.get_attendances(organization_id=182) | |
"""This gives us a series of rows with an event + attendee pair. To see the structure of the returned | |
object, we can print the table, but that's still hard to read. Let's use the json package, which lets | |
us print a dict with indenting. Maybe a pretty=False flag in to_dicts().""" | |
# import json | |
# print(json.dumps(attendances.to_dicts()[:3], indent=4, sort_keys=True)) | |
"""First, let's remove all columns except 'event', 'person', and 'timeslot':""" | |
# attendances = attendances.cut(["event", "person", "timeslot"]) | |
"""Then we can convert the data in the event_object column to be a simple string, our new tag. Note that | |
unlike convert_column, which feeds in the cell for that particular column to the converter function, | |
add_column feeds in the whole row of data. So you'll want to use it whenever you're working with data | |
from multiple columns.""" | |
def event_object_to_tag(row): | |
"""Takes a mobilize event object and returns a tag for it, containing the event's name, type, | |
and start date of first timeslot (or 'date unknown' if no timeslot exists).""" | |
if row["timeslot"]["start_date"]: | |
timeslot = convert_to_legible_date(row["timeslot"]["start_date"] ) | |
else: | |
timeslot = "date unknown" | |
return f"{row['event']['title']} - {row['event']['event_type']} - {timeslot}" | |
# attendances.add_column('event_tag', event_object_to_tag) | |
# print(json.dumps(attendances.to_dicts()[:3], indent=4, sort_keys=True)) | |
"""We can get rid of the event and timeslot columns now, leaving just "person" and our newly | |
created "event_tag":""" | |
# attendances = attendances.cut(["person", "event_tag"]) | |
"""We can now "unpack" the nested person object, turning the keys of the nested dictionary into their | |
own columns:""" | |
# attendances.unpack_dict("person", prepend=None) # by default, prepend is set to the column name, ie, person_phone_numbers | |
############################### | |
### Upload to ActionNetwork ### | |
############################### | |
"""It's time to upload our people to Action Network. First, let's import and initialize our connector:""" | |
from parsons import ActionNetwork | |
action_network = ActionNetwork() # don't forget to set the AN_API_TOKEN environmental variable! | |
"""Then, we'll iterate through our Mobilize people and add each person. (Typically you'll want to check | |
if the person already exists, but let's keep things simple.)""" | |
tag_list = [] # If you might duplicated existing tags, prepopulate with action_network.get_tags() | |
for index, record in enumerate(attendances): | |
if record["event_tag"] not in tag_list: | |
# NOTE: you can't delete tags in Action Network, so be careful when doing this with real data | |
action_network.add_tag(record["event_tag"]) | |
action_network.add_person( | |
email_address = record["email_addresses"], | |
given_name = record["given_name"], | |
family_name = record["family_name"], | |
postal_addresses = record["postal_addresses"], | |
mobile_number = record["phone_numbers"], | |
tags = record["event_tag"] | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment