Steven Rosa ssrosa

## get_story_insights.js
function getStoryInsights() {

  var stories = "https://graph.facebook.com/v4.0/" + insta_id + "/stories?" + user_access_token + "&limit=2000";
  var response = UrlFetchApp.fetch(stories);
  var data = JSON.parse(response.getContentText());

  //For each media object:
  // Get the ids to be reversed
  var id_arr_raw = [];


## mini_trend_lines.py
#Get impressions column for each day
unstacked = hourly_means['impressions'].unstack().T

#Get indices for non-zero xs ('exes') and ys ('whys') at each day
xs = {day: np.array(unstacked[day][unstacked[day] != 0].index) for day in day_names}
ys = {day: unstacked[day][unstacked[day] != 0].values for day in day_names}

#Calculate trends for each set of x and y
ffits = {day: poly.polyval(xs[day], poly.polyfit(xs[day], ys[day], 1)) \
         for day in day_names}

## plot_hourly_means.py
#Plot hourly means
ax = hourly_means.plot(figsize = (16, 8), x_compat = True)

#Create minor and major ticks to records hours and days
#Hours:
hour_mult = 4 #multiple locator
#Get ticks only for the hours set by the multiple locator
hour_ticks = [''] + [tup[1] for i, tup in enumerate(hourly_means.index) if i % hour_mult == 0]
ax.xaxis.set_minor_locator(ticker.MultipleLocator(hour_mult))
ax.xaxis.set_minor_formatter(ticker.FixedFormatter(hour_ticks))

## hourly_means.py
#Divide each column by its number of stories to get the mean
hourly_means = pd.DataFrame({col: hourly_mi[col] / hourly_mi['count'] for col in hourly_mi.columns}).fillna(0)

## group_by_day_hour.py
#Reset timestamps to be hour-long periods for filling in those periods with no data:
start = pd.Timestamp('2019-10-27 16:00:00+0000', tz='US/Eastern') #noon eastern
end = pd.Timestamp('2019-12-15 04:00:00+0000', tz='US/Eastern') #11pm eastern
#Create a timeframe to use for reindexing
timeframe = pd.period_range(start = start, end = end, freq = 'H')

#Set the df index to periods
df.set_index(df.index.to_period(freq = 'H'), inplace = True)
#Create a count column for use in group_by
df['count'] = np.ones(df.shape[0])

## clean_up.py
df.drop([0], axis = 0, inplace = True) #Drop column descriptors
#Index the df with datetimes
df.set_index(pd.to_datetime(df['timestamp']), inplace = True)
#Reset time zone to US Eastern time
df.index = df.index.tz_convert('US/Eastern')
#Drop unnecessary columns
df.drop(['timestamp', 'story_id'], axis = 1, inplace = True)
#Change remaining columns to int type
df = df.astype(int)
#Draw a plot of the data set

## find_corr.py
def find_corr(X, Y):

    correlation = find_cov(X, Y) / (X.std() * Y.std())

    return correlation

## neg_cov.py
#Draw a plot to show negative covariance

#Toy data: numbers 1 through 1000
X2 = np.arange(1, 1001, 1)
#Similar numbers, going the other way
Y2 = np.flip(X2) + (np.random.randint(-200, 200, 1000) * np.random.random(1000))
plt.scatter(X2, Y2)
plt.title('Negative covariance')

## pos_cov.py
#Draw a plot to show positive covariance

#Toy data: 1000 random numbers between 1 and 50
X = np.random.randint(1, 50, 1000)

#1000 slightly different numbers
Y = X + (np.random.randint(-15, 15, 1000) * np.random.random(1000))

plt.scatter(X, Y)
plt.title('Positive covariance')

## find_cov.py
def find_cov(X, Y):

    #Make sure both distributions have the same population size
    assert len(X) == len(Y), 'Distributions have different sizes.'

    muX = X.mean()
    muY = Y.mean()
    n = len(X)

    covariance = np.sum(
	function getStoryInsights() {

	var stories = "https://graph.facebook.com/v4.0/" + insta_id + "/stories?" + user_access_token + "&limit=2000";
	var response = UrlFetchApp.fetch(stories);
	var data = JSON.parse(response.getContentText());

	//For each media object:
	// Get the ids to be reversed
	var id_arr_raw = [];
	#Get impressions column for each day
	unstacked = hourly_means['impressions'].unstack().T

	#Get indices for non-zero xs ('exes') and ys ('whys') at each day
	xs = {day: np.array(unstacked[day][unstacked[day] != 0].index) for day in day_names}
	ys = {day: unstacked[day][unstacked[day] != 0].values for day in day_names}

	#Calculate trends for each set of x and y
	ffits = {day: poly.polyval(xs[day], poly.polyfit(xs[day], ys[day], 1)) \
	for day in day_names}
	#Plot hourly means
	ax = hourly_means.plot(figsize = (16, 8), x_compat = True)

	#Create minor and major ticks to records hours and days
	#Hours:
	hour_mult = 4 #multiple locator
	#Get ticks only for the hours set by the multiple locator
	hour_ticks = [''] + [tup[1] for i, tup in enumerate(hourly_means.index) if i % hour_mult == 0]
	ax.xaxis.set_minor_locator(ticker.MultipleLocator(hour_mult))
	ax.xaxis.set_minor_formatter(ticker.FixedFormatter(hour_ticks))
	#Divide each column by its number of stories to get the mean
	hourly_means = pd.DataFrame({col: hourly_mi[col] / hourly_mi['count'] for col in hourly_mi.columns}).fillna(0)
	#Reset timestamps to be hour-long periods for filling in those periods with no data:
	start = pd.Timestamp('2019-10-27 16:00:00+0000', tz='US/Eastern') #noon eastern
	end = pd.Timestamp('2019-12-15 04:00:00+0000', tz='US/Eastern') #11pm eastern
	#Create a timeframe to use for reindexing
	timeframe = pd.period_range(start = start, end = end, freq = 'H')

	#Set the df index to periods
	df.set_index(df.index.to_period(freq = 'H'), inplace = True)
	#Create a count column for use in group_by
	df['count'] = np.ones(df.shape[0])
	df.drop([0], axis = 0, inplace = True) #Drop column descriptors
	#Index the df with datetimes
	df.set_index(pd.to_datetime(df['timestamp']), inplace = True)
	#Reset time zone to US Eastern time
	df.index = df.index.tz_convert('US/Eastern')
	#Drop unnecessary columns
	df.drop(['timestamp', 'story_id'], axis = 1, inplace = True)
	#Change remaining columns to int type
	df = df.astype(int)
	#Draw a plot of the data set
	def find_corr(X, Y):

	correlation = find_cov(X, Y) / (X.std() * Y.std())

	return correlation
	#Draw a plot to show negative covariance

	#Toy data: numbers 1 through 1000
	X2 = np.arange(1, 1001, 1)
	#Similar numbers, going the other way
	Y2 = np.flip(X2) + (np.random.randint(-200, 200, 1000) * np.random.random(1000))
	plt.scatter(X2, Y2)
	plt.title('Negative covariance')
	#Draw a plot to show positive covariance

	#Toy data: 1000 random numbers between 1 and 50
	X = np.random.randint(1, 50, 1000)

	#1000 slightly different numbers
	Y = X + (np.random.randint(-15, 15, 1000) * np.random.random(1000))

	plt.scatter(X, Y)
	plt.title('Positive covariance')
	def find_cov(X, Y):

	#Make sure both distributions have the same population size
	assert len(X) == len(Y), 'Distributions have different sizes.'

	muX = X.mean()
	muY = Y.mean()
	n = len(X)

	covariance = np.sum(