datarocks/hashtag_stack.py

## hashtag_stack.py
import tablib
from copy import deepcopy

# pull the data in, using tablib, a really great library for messing around with tabular data
# more details about tablib here: http://docs.python-tablib.org/en/latest/).
# For serious data analysis, you are going to need to mess around with pandas http://pandas.pydata.org/
data = tablib.Dataset().load(open('picodash_instagram_nra_blog_2016-12-14.csv').read())

# make the headers for the new dataset, there has to be a more elegant way to do this, but this works
headers = data.headers
# There is going to be a new header, "hashtag", so lets add it to the headers list
headers.append(u'hashtag')

# The new dataset we are building, with the headers set
stacked_data = tablib.Dataset(headers=headers)

# chug through the rows of the dataset
for row in data.dict:
    # split the data
    for hashtag in row['Hashtags'].split(','):
        newrow = deepcopy(row)
        # add the specific hashtag
        newrow.update({'hashtag': hashtag})
        values = tuple(newrow.values())
        # add the new row to the dataset
        stacked_data.append(values)

# export the dataset as a new excel document
with open('hashtag_data.xlsx', 'wb') as f:
    f.write(stacked_data.xlsx)
	import tablib
	from copy import deepcopy

	# pull the data in, using tablib, a really great library for messing around with tabular data
	# more details about tablib here: http://docs.python-tablib.org/en/latest/).
	# For serious data analysis, you are going to need to mess around with pandas http://pandas.pydata.org/
	data = tablib.Dataset().load(open('picodash_instagram_nra_blog_2016-12-14.csv').read())

	# make the headers for the new dataset, there has to be a more elegant way to do this, but this works
	headers = data.headers
	# There is going to be a new header, "hashtag", so lets add it to the headers list
	headers.append(u'hashtag')

	# The new dataset we are building, with the headers set
	stacked_data = tablib.Dataset(headers=headers)

	# chug through the rows of the dataset
	for row in data.dict:
	# split the data
	for hashtag in row['Hashtags'].split(','):
	newrow = deepcopy(row)
	# add the specific hashtag
	newrow.update({'hashtag': hashtag})
	values = tuple(newrow.values())
	# add the new row to the dataset
	stacked_data.append(values)

	# export the dataset as a new excel document
	with open('hashtag_data.xlsx', 'wb') as f:
	f.write(stacked_data.xlsx)