sagarnanduunc/Creating timeline of pandas data frame based on time granularity using Time Grouper

## Creating timeline of pandas data frame based on time granularity using Time Grouper
# "field" is just a custom name that you want to give to the count of record column in the timeline dataframe
def createTimeLine(df,field,granularity):
    # Since I did it on Twitter data, I used 'postedTime' but that can be generalized as well
    # Here I use Timegrouper which is group by based on time granularity (secs, mins, days, hours, months ....)
    timegrp = df.set_index('postedTime').groupby(pd.TimeGrouper(freq=granularity)) # Grouping data based on Granularity
    timeCount = {"day":[],field:[]} # Creating a dictionary having keys as "day" and field to convert into dataframe later
    # users = len(df.groupby("actorId"))
    for time_unit in timegrp: # Parsing through all the formed groups
        #print(time_unit[0].strftime('%Y-%m-%d'),": ",len(time_unit[1]))
        timeCount["day"].append(time_unit[0].strftime('%Y-%m-%d')) # adding the group
        timeCount[field].append(len(time_unit[1]))  # adding count i.e. number of records associated with that group based on granularity
    #timeCount[field+"IR"] = np.log([1]+ [(timeCount[field][x]/timeCount[field][x-1]) for x in range(1,len(timeCount[field]))])
    #tweetTimeLine = pd.DataFrame(timeCount)[["day",field,field+"IR"]]
    #print(len(tweetTimeLine))
    tweetTimeLine["day"] = pd.to_datetime(tweetTimeLine["day"]) # converts the dictionary to dataframe
    return tweetTimeLine # returns data frame with groups as index and Counts of data associated with each group
    #plt.plot(tweetTimeLine["day"],tweetTimeLine["count"]) # ploting the graph on matplotlob

    # for interactive timeline you can use plotly
	# "field" is just a custom name that you want to give to the count of record column in the timeline dataframe
	def createTimeLine(df,field,granularity):
	# Since I did it on Twitter data, I used 'postedTime' but that can be generalized as well
	# Here I use Timegrouper which is group by based on time granularity (secs, mins, days, hours, months ....)
	timegrp = df.set_index('postedTime').groupby(pd.TimeGrouper(freq=granularity)) # Grouping data based on Granularity
	timeCount = {"day":[],field:[]} # Creating a dictionary having keys as "day" and field to convert into dataframe later
	# users = len(df.groupby("actorId"))
	for time_unit in timegrp: # Parsing through all the formed groups
	#print(time_unit[0].strftime('%Y-%m-%d'),": ",len(time_unit[1]))
	timeCount["day"].append(time_unit[0].strftime('%Y-%m-%d')) # adding the group
	timeCount[field].append(len(time_unit[1])) # adding count i.e. number of records associated with that group based on granularity
	#timeCount[field+"IR"] = np.log([1]+ [(timeCount[field][x]/timeCount[field][x-1]) for x in range(1,len(timeCount[field]))])
	#tweetTimeLine = pd.DataFrame(timeCount)[["day",field,field+"IR"]]
	#print(len(tweetTimeLine))
	tweetTimeLine["day"] = pd.to_datetime(tweetTimeLine["day"]) # converts the dictionary to dataframe
	return tweetTimeLine # returns data frame with groups as index and Counts of data associated with each group
	#plt.plot(tweetTimeLine["day"],tweetTimeLine["count"]) # ploting the graph on matplotlob

	# for interactive timeline you can use plotly