Skip to content

Instantly share code, notes, and snippets.

@schaunwheeler
Last active May 8, 2018 17:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save schaunwheeler/541c5737c6e9b5dd97e8f5318963bba6 to your computer and use it in GitHub Desktop.
Save schaunwheeler/541c5737c6e9b5dd97e8f5318963bba6 to your computer and use it in GitHub Desktop.
"""
Code described in Medium post https://medium.com/@schaun.wheeler/codify-your-workflow-377f5f8bf4c3
Copyright (c) 2018 Valassis Digital under the terms of the BSD 3-Clause license.
"""
def add_source(self, data, label, column_name=None, **kwargs):
# process data into x and y coordinates
if type(data) == DataFrame:
if column_name is None:
raise ValueError('If data is a dataframe then column_name must be specified.')
df = data.copy()
keys = [c for c in df.columns if c != column_name]
else:
df = Series(data).to_frame('raw_data')
keys = [k for k in kwargs.keys()]
column_name = 'raw_data'
df['processed_data'] = df[column_name].apply(self._process_input_value)
df['x_coords'] = df['processed_data'].apply(lambda v: v[0])
df['y_coords'] = df['processed_data'].apply(lambda v: v[1])
df = df.drop([column_name, 'processed_data'], axis=1)
# add kwargs as metadata
vals = ['x_coords', 'y_coords']
if len(keys) > 0:
df2 = DataFrame(columns=keys + ['i'] + vals).set_index(keys + ['i'])
else:
df2 = DataFrame(columns=keys + ['i'] + vals).set_index('i', append=True)
if len(kwargs) > 0:
for k, v in kwargs.items():
df[k] = v
df = df.set_index(keys)
# in cases where data inputs are multipolygons, split out into separate rows with metadata correctly associated
for ind, row in df.iterrows():
nrep = len(row['x_coords'])
nrep_check = len(row['y_coords'])
if nrep != nrep_check:
raise AssertionError('X and Y coordinate lists are not equal in length.')
for i in range(nrep):
ind2 = (ind, i,) if type(ind) != tuple else ind + (i,)
df2.loc[ind2, ['x_coords', 'y_coords']] = None, None
df2.loc[ind2, ['x_coords', 'y_coords']] = row['x_coords'][i], row['y_coords'][i]
self._set_coordinate_bounds(df2)
self.sources[label] = ColumnDataSource(df2.reset_index().drop(['i'], axis=1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment