Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created January 11, 2021 06:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/db7ee46f65617cb84367d6c95309eb9c to your computer and use it in GitHub Desktop.
Save amankharwal/db7ee46f65617cb84367d6c95309eb9c to your computer and use it in GitHub Desktop.
#Sorting And Feature Engineering
f_data = f_data.sort_values(by='date')
ft_data=f_data.copy()
ft_data['date'] = pd.to_datetime(f_data['date']).dt.date
ft_data['year'] = pd.DatetimeIndex(ft_data['date']).year
ft_data['month'] = pd.DatetimeIndex(ft_data['date']).month
ft_data['day'] = pd.DatetimeIndex(ft_data['date']).day
ft_data['day_of_year'] = pd.DatetimeIndex(ft_data['date']).dayofyear
ft_data['quarter'] = pd.DatetimeIndex(ft_data['date']).quarter
ft_data['season'] = ft_data.month%12 // 3 + 1
plt.subplot(2,1,1)
plt.title('Selecting A Cut-Off For Most Positive/Negative Tweets',fontsize=19,fontweight='bold')
ax0 = sns.kdeplot(f_data['Negative Sentiment'],bw=0.1)
kde_x, kde_y = ax0.lines[0].get_data()
ax0.fill_between(kde_x, kde_y, where=(kde_x>0.25) ,
interpolate=True, color='b')
plt.annotate('Cut-Off For Most Negative Tweets', xy=(0.25, 0.5), xytext=(0.4, 2),
arrowprops=dict(facecolor='red', shrink=0.05),fontsize=16,fontweight='bold')
ax0.axvline(f_data['Negative Sentiment'].mean(), color='r', linestyle='--')
ax0.axvline(f_data['Negative Sentiment'].median(), color='tab:orange', linestyle='-')
plt.legend({'PDF':f_data['Negative Sentiment'],r'Mean: {:.2f}'.format(f_data['Negative Sentiment'].mean()):f_data['Negative Sentiment'].mean(),
r'Median: {:.2f}'.format(f_data['Negative Sentiment'].median()):f_data['Negative Sentiment'].median()})
plt.subplot(2,1,2)
ax1 = sns.kdeplot(f_data['Positive Sentiment'],bw=0.1,color='green')
plt.annotate('Cut-Off For Most Positive Tweets', xy=(0.4, 0.43), xytext=(0.4, 2),
arrowprops=dict(facecolor='red', shrink=0.05),fontsize=16,fontweight='bold')
kde_x, kde_y = ax1.lines[0].get_data()
ax1.fill_between(kde_x, kde_y, where=(kde_x>0.4) ,
interpolate=True, color='green')
ax1.set_xlabel('Sentiment Strength',fontsize=18)
ax1.axvline(f_data['Positive Sentiment'].mean(), color='r', linestyle='--')
ax1.axvline(f_data['Positive Sentiment'].median(), color='tab:orange', linestyle='-')
plt.legend({'PDF':f_data['Positive Sentiment'],r'Mean: {:.2f}'.format(f_data['Positive Sentiment'].mean()):f_data['Positive Sentiment'].mean(),
r'Median: {:.2f}'.format(f_data['Positive Sentiment'].median()):f_data['Positive Sentiment'].median()})
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment