Last active
May 8, 2018 17:13
-
-
Save schaunwheeler/541c5737c6e9b5dd97e8f5318963bba6 to your computer and use it in GitHub Desktop.
Snapshot of `add_source` method from https://gist.github.com/schaunwheeler/9a98d8bee5f039e9872c76fb24a6e69c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Code described in Medium post https://medium.com/@schaun.wheeler/codify-your-workflow-377f5f8bf4c3 | |
Copyright (c) 2018 Valassis Digital under the terms of the BSD 3-Clause license. | |
""" | |
def add_source(self, data, label, column_name=None, **kwargs): | |
# process data into x and y coordinates | |
if type(data) == DataFrame: | |
if column_name is None: | |
raise ValueError('If data is a dataframe then column_name must be specified.') | |
df = data.copy() | |
keys = [c for c in df.columns if c != column_name] | |
else: | |
df = Series(data).to_frame('raw_data') | |
keys = [k for k in kwargs.keys()] | |
column_name = 'raw_data' | |
df['processed_data'] = df[column_name].apply(self._process_input_value) | |
df['x_coords'] = df['processed_data'].apply(lambda v: v[0]) | |
df['y_coords'] = df['processed_data'].apply(lambda v: v[1]) | |
df = df.drop([column_name, 'processed_data'], axis=1) | |
# add kwargs as metadata | |
vals = ['x_coords', 'y_coords'] | |
if len(keys) > 0: | |
df2 = DataFrame(columns=keys + ['i'] + vals).set_index(keys + ['i']) | |
else: | |
df2 = DataFrame(columns=keys + ['i'] + vals).set_index('i', append=True) | |
if len(kwargs) > 0: | |
for k, v in kwargs.items(): | |
df[k] = v | |
df = df.set_index(keys) | |
# in cases where data inputs are multipolygons, split out into separate rows with metadata correctly associated | |
for ind, row in df.iterrows(): | |
nrep = len(row['x_coords']) | |
nrep_check = len(row['y_coords']) | |
if nrep != nrep_check: | |
raise AssertionError('X and Y coordinate lists are not equal in length.') | |
for i in range(nrep): | |
ind2 = (ind, i,) if type(ind) != tuple else ind + (i,) | |
df2.loc[ind2, ['x_coords', 'y_coords']] = None, None | |
df2.loc[ind2, ['x_coords', 'y_coords']] = row['x_coords'][i], row['y_coords'][i] | |
self._set_coordinate_bounds(df2) | |
self.sources[label] = ColumnDataSource(df2.reset_index().drop(['i'], axis=1)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment