Skip to content

Instantly share code, notes, and snippets.

@erochest
Last active January 5, 2017 19:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save erochest/152227b0684dcdcbb8e096661ad92bcf to your computer and use it in GitHub Desktop.
Save erochest/152227b0684dcdcbb8e096661ad92bcf to your computer and use it in GitHub Desktop.
import pandas as pd
import datetime
DATA_INPUT = 'data/nonpivoted_data.csv'
EPISODE_INPUT = 'data/toy_relabeled_episodes.csv'
def date(string):
"""Convert a string to a date object."""
dt = datetime.datetime.strptime(string.split()[0], '%Y-%m-%d')
return dt.date()
# Read in the data
episodes = pd.read_csv(
EPISODE_INPUT,
converters={'episode_date': date},
usecols=['participant', 'episode_type', 'episode_date', 'certainty',
'episode_num'],
)
data = pd.read_csv(
DATA_INPUT,
converters={'date_time': date},
usecols=['participant', 'date_time'],
)
def set_episode_span(episodes, days):
"""
This sets the episodes begin and end dates to +/- days around the date.
"""
span = datetime.timedelta(days=days)
episodes['begin_date'] = episodes['episode_date'].map(lambda ed: ed - span)
episodes['end_date'] = episodes['episode_date'].map(lambda ed: ed + span)
return episodes
# Use a week day span to identify communications related to episodes.
set_episode_span(episodes, 7)
# merge the two datasets together based on the values of participant
data = data.merge(episodes, how='left', on=('participant',))
# filter the datasets based on whether the communication date falls within the
# episode's day range.
data = data[(data['date_time']>=data['begin_date']) &
(data['date_time']<=data['end_date'])]
# set the relative_date column
data['relative_date'] = data['date_time'] - data['episode_date']
print(data.head())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment