rchurch4/loading_and_splitting_datasets.py

## loading_and_splitting_datasets.py
from gdtm.helpers.common import load_dated_dataset, load_split_dataset, split_dataset_by_date, save_split_dataset, month

path_to_data = 'path/to/data/dated_tweets.csv'
dataset = load_dated_dataset(path=path_to_data, date_delimiter='\t', doc_delimiter=',')

# Split the data by month (there are epoch functions for day and week as well)
split_dataset = split_dataset_by_date(dataset, epoch_function=month)

# Save the split data set to make it easier to load in the future
# This is useful if we are running multiple experiments on the same data
num_time_periods = len(split_dataset.keys())
save_split_dataset(path=path_to_data, file_name='split_dataset', dataset=dataset, delimiter=' ')

# Load the split data set
loaded_dataset = load_split_dataset(path=path_to_data, file_name='split_dataset',
                                    num_time_periods=num_time_periods, delimiter=' ')
	from gdtm.helpers.common import load_dated_dataset, load_split_dataset, split_dataset_by_date, save_split_dataset, month

	path_to_data = 'path/to/data/dated_tweets.csv'
	dataset = load_dated_dataset(path=path_to_data, date_delimiter='\t', doc_delimiter=',')

	# Split the data by month (there are epoch functions for day and week as well)
	split_dataset = split_dataset_by_date(dataset, epoch_function=month)

	# Save the split data set to make it easier to load in the future
	# This is useful if we are running multiple experiments on the same data
	num_time_periods = len(split_dataset.keys())
	save_split_dataset(path=path_to_data, file_name='split_dataset', dataset=dataset, delimiter=' ')

	# Load the split data set
	loaded_dataset = load_split_dataset(path=path_to_data, file_name='split_dataset',
	num_time_periods=num_time_periods, delimiter=' ')