Skip to content

Instantly share code, notes, and snippets.

Last active Jan 5, 2017
What would you like to do?
import pandas as pd
import datetime
DATA_INPUT = 'data/nonpivoted_data.csv'
EPISODE_INPUT = 'data/toy_relabeled_episodes.csv'
def date(string):
"""Convert a string to a date object."""
dt = datetime.datetime.strptime(string.split()[0], '%Y-%m-%d')
# Read in the data
episodes = pd.read_csv(
converters={'episode_date': date},
usecols=['participant', 'episode_type', 'episode_date', 'certainty',
data = pd.read_csv(
converters={'date_time': date},
usecols=['participant', 'date_time'],
def set_episode_span(episodes, days):
This sets the episodes begin and end dates to +/- days around the date.
span = datetime.timedelta(days=days)
episodes['begin_date'] = episodes['episode_date'].map(lambda ed: ed - span)
episodes['end_date'] = episodes['episode_date'].map(lambda ed: ed + span)
return episodes
# Use a week day span to identify communications related to episodes.
set_episode_span(episodes, 7)
# merge the two datasets together based on the values of participant
data = data.merge(episodes, how='left', on=('participant',))
# filter the datasets based on whether the communication date falls within the
# episode's day range.
data = data[(data['date_time']>=data['begin_date']) &
# set the relative_date column
data['relative_date'] = data['date_time'] - data['episode_date']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment