Skip to content

Instantly share code, notes, and snippets.

# PRAW to interact with reddit
import praw
#install textblob if not already installed using "pip install -U textblob"
from textblob import TextBlob
import nltk
# Download VADER, if not downloaded
# nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# create object for VADER sentiment function interaction
import pandas as pd
# Recommended tensorflow version is <= 2.1.0, otherwise F1 score function breaks
import tensorflow as tf
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
from transformers import TFRobertaForSequenceClassification
from transformers import RobertaTokenizer
# Load your Dataset
import pandas as pd
# Recommended tensorflow version is <= 2.1.0, otherwise F1 score function breaks
import tensorflow as tf
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import tensorflow_datasets as tfds
from transformers import TFRobertaForSequenceClassification
from transformers import RobertaTokenizer
import os
import praw
import pandas as pd
from transformers import RobertaTokenizer
import tensorflow as tf
from transformers import TFRobertaForSequenceClassification
import tensorflow_datasets as tfds
reddit = praw.Reddit(client_id='client id',
client_secret='client sceret',
user_agent='user agent')
from transformers import TFAutoModelForTokenClassification, AutoTokenizer
import tensorflow as tf
import praw
import pandas as pd
model = TFAutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
label_list = [
"O", # Outside of a named entity
from gensim.parsing.preprocessing
import remove_stopwords
import genism
from wordcloud import WordCloud
import numpy as np
import random
# import stopwords from gensim methods to stop_list variable
# You can also manually add stopwords
gensim_stopwords = gensim.parsing.preprocessing.STOPWORDS
import spacy
from matplotlib import cm
from matplotlib.pyplot import plt
nlp = spacy.load('en_core_web_sm')
ner_collection = {"Location":[],"Person":[],"Date":[],"Quantity":[],"Organisation":[]}
location = []
person = []
date = []
quantity = []
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
# Loat the train and test data
train_df = pd.read_csv('train.csv')
train_df['df_type'] = 'train'
test_df = pd.read_csv('test.csv')
# import the
import pandas as pd
from matplotlib import cm
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.path as path
import matplotlib.ticker as ticker
import matplotlib.animation as animation
import pandas as pd
# prtint min, max, median, first quartile, third quartile and random quartile
# using .quartile()
for i in num_col:
print(f'Min: {train[i].quantile(0)} First Quartile: {train[i].quantile(0.25)}'
f'Median: {train[i].quantile(0.5)} Third Quartile: {train[i].quantile(0.75)}'
f'Max: {train[i].quantile(0)} Random Quartile(90%): {train[i].quantile(0.9)}')
# quartile for categorical variables
def percentile(n):
def percentile_(x):