Last active
August 12, 2022 17:24
-
-
Save natyrix/50e08897a8b7f82164e24f668a74d84b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import streamlit as st | |
import altair as alt | |
from wordcloud import WordCloud | |
import plotly.express as px | |
from textblob import TextBlob | |
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer | |
import pickle | |
from pages.plots import * | |
# from add_data import db_execute_fetch #Comment this line for deployed version | |
st.set_page_config(page_title="Dashboard", layout="wide") | |
loaded_df = None | |
def loadData(): | |
query = "select * from TweetInformation" | |
# df = db_execute_fetch(query, dbName="tweets", rdf=True) | |
df = pd.read_csv("./st_dashboard/processed_global_data_tweets.csv") #For deployed version | |
loaded_df = df | |
return df | |
def selectHashTag(): | |
df = loadData() if loaded_df is None else loaded_df | |
hashTags = st.multiselect("choose combaniation of hashtags", list(df['hashtags'].unique())) | |
if hashTags: | |
df = df[np.isin(df, hashTags).any(axis=1)] | |
st.write(df) | |
def selectLocAndAuth(): | |
df = loadData() if loaded_df is None else loaded_df | |
location = st.multiselect("choose Location of tweets", list(df['place'].unique())) | |
# lang = st.multiselect("choose Language of tweets", list(df['language'].unique())) | |
lang = st.multiselect("choose Language of tweets", list(df['lang'].unique())) #For deployed version | |
if location and not lang: | |
df = df[np.isin(df, location).any(axis=1)] | |
st.write(df) | |
elif lang and not location: | |
df = df[np.isin(df, lang).any(axis=1)] | |
st.write(df) | |
elif lang and location: | |
location.extend(lang) | |
df = df[np.isin(df, location).any(axis=1)] | |
st.write(df) | |
else: | |
st.write(df) | |
def wordCloud(): | |
df = loadData() if loaded_df is None else loaded_df | |
cleanText = '' | |
for text in df['full_text']: | |
tokens = str(text).lower().split() | |
cleanText += " ".join(tokens) + " " | |
wc = WordCloud(width=650, height=450, background_color='white', min_font_size=5).generate(cleanText) | |
st.title("Tweet Text Word Cloud") | |
st.image(wc.to_array()) | |
def trainModelTest(): | |
st.markdown("<p style='padding:10px; background-color:#000320;color:#00ECB9;font-size:16px;border-radius:10px;'>Your CSV file should have 'full_text' as a column</p>", unsafe_allow_html=True) | |
uploaded_file = st.file_uploader("Choose a CSV file") | |
try: | |
if uploaded_file is not None: | |
print(uploaded_file.type) | |
if uploaded_file.type == 'text/csv': | |
df = pd.read_csv(uploaded_file) | |
df['full_text'] = df['full_text'].apply( | |
lambda text: text.replace(',', ' ') | |
) | |
df['polarity'] = getPolarity(df['full_text']) | |
processed_df = df[["full_text", "polarity"]] | |
def applyConvert(val): | |
if val < 0: | |
return "negative" | |
elif val == 0: | |
return "neutral" | |
else: | |
return "positive" | |
processed_df['score'] = processed_df['polarity'].apply(applyConvert) | |
processed_df = processed_df[processed_df['score']!="neutral"] | |
processed_df['scoremap'] = processed_df['score'].map( | |
lambda val: 1 if "positive" else 0 | |
) | |
(X, y) = processed_df['full_text'], processed_df['scoremap'] | |
trigram_vectorizer = CountVectorizer(ngram_range=(1, 3)) | |
trigram_vectorizer.fit(X.values) | |
X_trigram_vectorizer = trigram_vectorizer.transform(X.values) | |
loaded_model = None | |
loaded_X_test = None | |
loaded_y_test = None | |
with open('./st_dashboard/model.pkl', 'rb') as f: | |
loaded_model = pickle.load(f) | |
with open("./st_dashboard/X_test.pkl", 'rb') as f: | |
loaded_X_test = pickle.load(f) | |
with open("./st_dashboard/y_test.pkl", 'rb') as f: | |
loaded_y_test = pickle.load(f) | |
if loaded_model is not None: | |
# print(X_trigram_vectorizer) | |
# print(loaded_model.predict(X_trigram_vectorizer)) | |
test_score = loaded_model.score(loaded_X_test, loaded_y_test) | |
st.write(f'Test score: {round(test_score, 2)}') | |
else: | |
st.write("No model found to test.") | |
else: | |
st.write("Only .csv file is allowed") | |
except Exception as e: | |
msg = str(e) | |
print(msg) | |
st.write(msg) | |
def getPolarity(text: str): | |
polarity = [] | |
for t in text: | |
each_sentiment = TextBlob(t).sentiment | |
polarity.append(each_sentiment.polarity) | |
return polarity | |
def findFullText(df: pd.DataFrame): | |
print(len(df)) | |
text = [d['full_text'] for d in df] | |
text = [d['full_text'].replace(',', ' ') | |
for d in df.iteritems()] | |
return text | |
def mainPage(): | |
st.title("Data Display") | |
selectHashTag() | |
st.markdown("<p style='padding:10px; background-color:#000000;color:#00ECB9;font-size:16px;border-radius:10px;'>Section Break</p>", unsafe_allow_html=True) | |
selectLocAndAuth() | |
wordCloud() | |
# with st.expander("Show More Graphs"): | |
# locationPie() | |
# userMentionbarChart() | |
# sourcePie() | |
# stBarChart() | |
# sentimentPie() #Only For deployed version | |
# langPie() | |
with st.expander("Test the trained model with a csv file"): | |
trainModelTest() | |
def plots(): | |
# st.markdown("# Data Visualizations ❄️") | |
st.sidebar.markdown("# Data Visualizations ❄️") | |
st.title("Data Visualizations") | |
locationPie() | |
userMentionbarChart() | |
sourcePie() | |
stBarChart() | |
sentimentPie() #Only For deployed version | |
langPie() | |
page_names_to_funcs = { | |
"Main Page": mainPage, | |
"Data Visualizations": plots, | |
} | |
selected_page = st.sidebar.selectbox("Select a page", page_names_to_funcs.keys()) | |
page_names_to_funcs[selected_page]() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment