This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install: !pip install next_word_prediction | |
# curText = doc_set[1135957] | |
from next_word_prediction import GPT2 | |
gpt2 = GPT2() | |
# Predict the next word after 'vegetarian' | |
gpt2.predict_next(curText.replace('[MASK]', ''), 5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Replace with the correct words | |
from functools import reduce | |
reduce(lambda x, y: x.replace(*y), [curText, *list(correctDict.items())]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install: !pip install pyspellchecker | |
from spellchecker import SpellChecker | |
spell = SpellChecker() | |
# Text from 122001 of the wikiText data; modified to inculde typos: | |
# 'commentary' -> 'commentyra', 'gimmick' -> 'gimimick'; | |
# curText = doc_set[122001] | |
# Use the spellchecker to identify and correct the typos | |
correctDict = {} | |
for val in re.split(r'[^\w]', curText): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# curText = doc_set[122001] | |
curText = ''' | |
six months ago we thought it would be a fun idea to release our album on election day but this is not the election to be cute . | |
we felt as though rather than making a commentary we were only riding the wave of the election . this seemed less and less like what we | |
intended to do and more of a gimmick . | |
''' | |
## Replace the words 'day', 'we', 'is' | |
print(' '.join(re.sub(r'|'.join(map(re.escape, ['day', 'we', 'is'])), ' ', curText).split())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Install: !pip install textstate | |
import textstat | |
# curText = doc_set[27310] | |
# (1) Flesch readability score | |
print(textstat.flesch_reading_ease(curText)) | |
68.94 ## indicating Standard | |
# (2) Reading time, assuming 15 ms/character | |
print(textstat.reading_time(curText, ms_per_char=15)) | |
3.8 ## 3.8s to read | |
# (3) Grade level: Intended for text written for children up to grade four |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import streamlit as st | |
st.title("Online Retail") | |
## !!! date options | |
date = st.sidebar.selectbox( | |
"Select a Invoice Date Range", | |
[ | |
"'2010-12-01' and '2011-01-01'", | |
"'2010-12-01' and '2010-12-15'" | |
]) | |
## !!! product options |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Query with placeholders | |
timeSeries_query = f""" | |
SELECT | |
InvoiceDate, | |
Description, | |
ROUND(SUM(Quantity * UnitPrice), 2) AS Total_Sale_Amt | |
FROM | |
OnlineRetail | |
WHERE InvoiceDate BETWEEN {date} ## !!! Placeholder for date | |
AND Description IN ('{product}') ## !!! Placeholder for product |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Read in external SQL file | |
sqlFile = open(r'...\OnlineRetailPull.sql', 'r') | |
myQuery = sqlFile.read() | |
OnlineRetailData = pd.read_sql_query(myQuery, engine) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Query to pull data | |
timeSeries_query = """ | |
SELECT | |
InvoiceDate, | |
Description, | |
ROUND(SUM(Quantity * UnitPrice), 2) AS Total_Sale_Amt | |
FROM | |
OnlineRetail | |
WHERE InvoiceDate BETWEEN '2010-12-01' AND '2011-01-01' | |
AND Description IN ('CHOCOLATE HOT WATER BOTTLE', 'GREY HEART HOT WATER BOTTLE') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sqlalchemy as sa | |
import urllib | |
params = urllib.parse.quote_plus("DRIVER={SQL Server Native Client 11.0};" | |
"SERVER=localhost\SQLEXPRESS;" | |
"DATABASE=master;" | |
"Trusted_Connection=yes") | |
## Connect using the specified parameters | |
engine = sa.create_engine("mssql+pyodbc:///?odbc_connect={}".format(params)) |