Skip to content

Instantly share code, notes, and snippets.

View layerWikiText.py
## Install: !pip install -U layer
import layer
df = layer.get_dataset('layer/wikitext/datasets/wikitext-103-train').to_pandas()
doc_set = [i for i in df.sentence.str.lower()]
View semantic.py
# Install: !pip install sentence_transformers
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('distilbert-base-nli-mean-tokens')
# sentences = [doc_set[9234], doc_set[9239], doc_set[1131966]]
sentence_embeddings = model.encode(sentences)
# Calculate cosine distance of the embeddings
from scipy.spatial import distance
print(1 - distance.cosine(sentence_embeddings[0], sentence_embeddings[1]))
View wordCloud.py
##### Generate the wordcloud #####
my_freq_grams = freq_grams
curMask = np.array(Image.open(pathToYourPic))
wc = WordCloud(background_color='white',
stopwords=stopwords,
width=800,
height=600,
relative_scaling=.6,
max_font_size=60,
View ngrams.py
from wordcloud import WordCloud, ImageColorGenerator
from nltk.corpus import stopwords
from nltk.util import ngrams
import nltk
def replace(match):
return swMapping[match.group(0)]
## Define stopwords
curSW = stopwords.words('english')
curSW += ['unk']
View nextWordTops.py
## Top 5 next words
['dishes', 'and', ',', '-', 'recipes']
## Comparing with the original text
falafel has become popular among vegetarians and vegans ,
as an alternative to meat @-@ laden street foods ,
and is now sold in packaged mixes in health @-@ food stores .
falafel 's versatility has allowed for the reformulating of recipes for meatloaf ,
sloppy joes and spaghetti and meatballs into vegetarian [dishes] .
View nextWordPredictor.py
# Install: !pip install next_word_prediction
# curText = doc_set[1135957]
from next_word_prediction import GPT2
gpt2 = GPT2()
# Predict the next word after 'vegetarian'
gpt2.predict_next(curText.replace('[MASK]', ''), 5)
View correct.py
# Replace with the correct words
from functools import reduce
reduce(lambda x, y: x.replace(*y), [curText, *list(correctDict.items())])
View spellchecker.py
# Install: !pip install pyspellchecker
from spellchecker import SpellChecker
spell = SpellChecker()
# Text from 122001 of the wikiText data; modified to inculde typos:
# 'commentary' -> 'commentyra', 'gimmick' -> 'gimimick';
# curText = doc_set[122001]
# Use the spellchecker to identify and correct the typos
correctDict = {}
for val in re.split(r'[^\w]', curText):
View re.py
import re
# curText = doc_set[122001]
curText = '''
six months ago we thought it would be a fun idea to release our album on election day but this is not the election to be cute .
we felt as though rather than making a commentary we were only riding the wave of the election . this seemed less and less like what we
intended to do and more of a gimmick .
'''
## Replace the words 'day', 'we', 'is'
print(' '.join(re.sub(r'|'.join(map(re.escape, ['day', 'we', 'is'])), ' ', curText).split()))
View textstat.py
### Install: !pip install textstate
import textstat
# curText = doc_set[27310]
# (1) Flesch readability score
print(textstat.flesch_reading_ease(curText))
68.94 ## indicating Standard
# (2) Reading time, assuming 15 ms/character
print(textstat.reading_time(curText, ms_per_char=15))
3.8 ## 3.8s to read
# (3) Grade level: Intended for text written for children up to grade four