Skip to content

Instantly share code, notes, and snippets.

# Install: !pip install pyspellchecker
from spellchecker import SpellChecker
spell = SpellChecker()
# Text from 122001 of the wikiText data; modified to inculde typos:
# 'commentary' -> 'commentyra', 'gimmick' -> 'gimimick';
# curText = doc_set[122001]
# Use the spellchecker to identify and correct the typos
correctDict = {}
for val in re.split(r'[^\w]', curText):
# Replace with the correct words
from functools import reduce
reduce(lambda x, y: x.replace(*y), [curText, *list(correctDict.items())])
# Install: !pip install next_word_prediction
# curText = doc_set[1135957]
from next_word_prediction import GPT2
gpt2 = GPT2()
# Predict the next word after 'vegetarian'
gpt2.predict_next(curText.replace('[MASK]', ''), 5)
## Top 5 next words
['dishes', 'and', ',', '-', 'recipes']
## Comparing with the original text
falafel has become popular among vegetarians and vegans ,
as an alternative to meat @-@ laden street foods ,
and is now sold in packaged mixes in health @-@ food stores .
falafel 's versatility has allowed for the reformulating of recipes for meatloaf ,
sloppy joes and spaghetti and meatballs into vegetarian [dishes] .
from wordcloud import WordCloud, ImageColorGenerator
from nltk.corpus import stopwords
from nltk.util import ngrams
import nltk
def replace(match):
return swMapping[match.group(0)]
## Define stopwords
curSW = stopwords.words('english')
curSW += ['unk']
import re
# curText = doc_set[122001]
curText = '''
six months ago we thought it would be a fun idea to release our album on election day but this is not the election to be cute .
we felt as though rather than making a commentary we were only riding the wave of the election . this seemed less and less like what we
intended to do and more of a gimmick .
'''
## Replace the words 'day', 'we', 'is'
print(' '.join(re.sub(r'|'.join(map(re.escape, ['day', 'we', 'is'])), ' ', curText).split()))
@YiLi225
YiLi225 / pythonSQL.py
Last active April 5, 2022 15:04
Query to pull data into Python
## Query to pull data
timeSeries_query = """
SELECT
InvoiceDate,
Description,
ROUND(SUM(Quantity * UnitPrice), 2) AS Total_Sale_Amt
FROM
OnlineRetail
WHERE InvoiceDate BETWEEN '2010-12-01' AND '2011-01-01'
AND Description IN ('CHOCOLATE HOT WATER BOTTLE', 'GREY HEART HOT WATER BOTTLE')
import streamlit as st
st.title("Online Retail")
## !!! date options
date = st.sidebar.selectbox(
"Select a Invoice Date Range",
[
"'2010-12-01' and '2011-01-01'",
"'2010-12-01' and '2010-12-15'"
])
## !!! product options
## Query with placeholders
timeSeries_query = f"""
SELECT
InvoiceDate,
Description,
ROUND(SUM(Quantity * UnitPrice), 2) AS Total_Sale_Amt
FROM
OnlineRetail
WHERE InvoiceDate BETWEEN {date} ## !!! Placeholder for date
AND Description IN ('{product}') ## !!! Placeholder for product
## Read in external SQL file
sqlFile = open(r'...\OnlineRetailPull.sql', 'r')
myQuery = sqlFile.read()
OnlineRetailData = pd.read_sql_query(myQuery, engine)