Skip to content

Instantly share code, notes, and snippets.

@willprice
Last active February 12, 2020 16:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save willprice/9bf576f0e928dcf6859fe8386404c7cd to your computer and use it in GitHub Desktop.
Save willprice/9bf576f0e928dcf6859fe8386404c7cd to your computer and use it in GitHub Desktop.
Script to read Charades CSV files (Charades_v1_train.csv, Charades_v1_test.csv) into a pandas DataFrame where each row represents a single action
import pandas as pd
from csv import DictReader
from collections import defaultdict
import numpy as np
def read_charades_csv(path):
def parse_action(action_str):
cls, start, stop = action_str.split(' ')
return {
'class': int(cls[1:]),
'start': float(start),
'stop': float(stop)
}
def parse_actions(actions_str):
actions = actions_str.split(';')
return [parse_action(action.strip()) for action in actions]
def maybe_null(reader_fn):
def reader(val):
if isinstance(val, str) and len(val) == 0:
return np.nan
return reader_fn(val)
return reader
readers = {
'objects': lambda objects_str: objects_str.split(';'),
'length': maybe_null(float),
'quality': maybe_null(int),
'relevance': maybe_null(int),
}
with open(path, 'r') as f:
columns = defaultdict(lambda: [])
reader = DictReader(f)
for row in reader:
cols = list(row.keys())
if len(row['actions']) > 0:
actions = parse_actions(row['actions'])
action_count = len(actions)
for col in set(cols) - set(['actions']):
if col in readers:
value = readers[col](row[col])
else:
value = row[col]
columns[col].extend([value] * action_count)
columns['action'].extend([a['class'] for a in actions])
columns['start_time'].extend([a['start'] for a in actions])
columns['stop_time'].extend([a['stop'] for a in actions])
return pd.DataFrame(dict(columns))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment