Skip to content

Instantly share code, notes, and snippets.

View robotenique's full-sized avatar
🌜
Thinking 🤔

Juliano Garcia robotenique

🌜
Thinking 🤔
View GitHub Profile
@robotenique
robotenique / app.py
Last active October 31, 2018 14:27
How to: (1) Parse json snack object (2) open the image and add the image into the Model (3) save the new data (4) Iterate and access the snacks and images from the mongodb
@app.route("/display-snack")
@login_required
def display_snack():
my_database = mongo[DATABASE]
my_file = open("snacks/snacks.json", "rb")
parsed = json.loads(my_file.read().decode('unicode-escape'))
snacks = parsed
s = snacks
Snack.objects.delete()
for s in snacks:
np.random.seed(42)
df = (dataset
.rename(columns={"Created At": "publication_date",
"Text": "tweet",
"Retweet Count" : "num_retweets",
"Username": "username",
"Classificacao": "sentiment"})
.loc[:, ["publication_date", "tweet", "num_retweets", "username", "sentiment"]])
df["publication_date"] = pd.to_datetime(df["publication_date"],infer_datetime_format=True)
df["sentiment"] = df["sentiment"].replace({"Negativo": 0, "Neutro": np.random.choice([0, 1]), "Positivo": 1})
@robotenique
robotenique / tweet_eval.py
Last active May 19, 2019 22:03
Gist for tweet analysis using fklearn
from fklearn.validation.evaluators import auc_evaluator, logloss_evaluator, precision_evaluator, recall_evaluator, \
combined_evaluators, temporal_split_evaluator
def tweet_eval(target_column, prediction_column, time_column):
eval_args = dict(target_column=target_column, prediction_column=prediction_column)
basic_evaluator = combined_evaluators(evaluators=[
auc_evaluator(**eval_args),
logloss_evaluator(**eval_args),
precision_evaluator(**eval_args),
recall_evaluator(**eval_args)
from fklearn.training.classification import nlp_logistic_classification_learner
from fklearn.training.pipeline import build_pipeline
from fklearn.training.utils import log_learner_time
def training_pipeline(text_cols, target_column, vectorizer_params, logistic_params):
return log_learner_time(
build_pipeline(
nlp_logistic_classification_learner(
text_feature_cols=text_cols,
target=target_column,
# Check docs here https://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfVectorizer.html
vectorizer_params = dict(decode_error="replace",
lowercase=True,
stop_words=nltk.corpus.stopwords.words("portuguese"),
ngram_range=(1, 3),
strip_accents=None)
# Check docs here https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
logistic_params = dict(random_state=42,
n_jobs=-1,
solver="lbfgs")
from fklearn.validation.evaluators import auc_evaluator, logloss_evaluator, precision_evaluator, recall_evaluator, \
combined_evaluators, temporal_split_evaluator
def tweet_eval(target_column, prediction_column, time_column):
eval_args = dict(target_column=target_column, prediction_column=prediction_column)
basic_evaluator = combined_evaluators(evaluators=[
auc_evaluator(**eval_args),
logloss_evaluator(**eval_args),
precision_evaluator(**eval_args),
recall_evaluator(**eval_args)
from fklearn.preprocessing.splitting import space_time_split_dataset
train_set, intime_outspace_hdout, outime_inspace_hdout, outime_outspace_hdout = \
space_time_split_dataset(df,
train_start_date="2016-12-31",
train_end_date="2017-01-10",
holdout_end_date="2017-02-13",
split_seed=42,
space_holdout_percentage=0.2,
space_column="username",
from fklearn.metrics.pd_extractors import *
eval_auc_col = "auc_evaluator__sentiment"
eval_logloss_col = "logloss_evaluator__sentiment"
eval_precision_col = "precision_evaluator__sentiment"
eval_recall_col = "recall_evaluator__sentiment"
base_extractor = combined_evaluator_extractor(base_extractors=[evaluator_extractor(evaluator_name=eval_auc_col),
evaluator_extractor(evaluator_name=eval_logloss_col),
evaluator_extractor(evaluator_name=eval_precision_col),
evaluator_extractor(evaluator_name=eval_recall_col)])