This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from nltk.corpus import stopwords | |
def find_org(text, acronym): | |
# good for abbrevs that are in all caps. | |
try: | |
text = text.split(acronym)[0] | |
except: | |
text = text | |
orig_text_token_list = text.split(" ") | |
text_token_list = [x.title() for x in orig_text_token_list] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import boto3 | |
import boto3.session | |
cred = boto3.Session().get_credentials() | |
ACCESS_KEY = cred.access_key | |
SECRET_KEY = cred.secret_key | |
SESSION_TOKEN = cred.token ## optional | |
s3client = boto3.client('s3', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# programmatically edit metadata of s3 bucket files, so the links open in a browser tab and do not download | |
# to your computer when clicked | |
client = boto3.client('s3', aws_access_key_id=ACCESS_KEY, aws_secret_access_key=SECRET_KEY) | |
for i in html_list: | |
key = i | |
k = client.head_object(Bucket = bucket_name, Key = key) | |
client.copy_object(Bucket = bucket_name, Key = key, CopySource = bucket + '/' + key, ContentType ='text/html', MetadataDirective='REPLACE') | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#open doc, from folder 'docs', extract XML coding | |
pathway = '/home/ec2-user/ec2docs/'+file | |
document = zipfile.ZipFile(pathway) | |
xml_content = document.read('word/document.xml') | |
document.close() | |
xml_str = str(xml_content) | |
#create linklist for doc, by going through the XML and finding the links | |
link_list = re.findall('>http.*?\<',xml_str) #it returns text starting with '>http', ending with '<', inclusive. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Save our s3 word docs to the EC2 instance | |
bucket_name = 'name_of_s3_bucket' # replace with your bucket name | |
session = Session(aws_access_key_id=ACCESS_KEY, | |
aws_secret_access_key=SECRET_KEY) | |
s3 = session.resource('s3') | |
#for loop to loop through all files in the file_list and downlaod them to the EC2 instance. | |
for word_doc in file_list: | |
#word_doc is the name of our file, as a string. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Create list of files to download to EC2 Instance | |
from boto3.session import Session | |
ACCESS_KEY='your_access_key' | |
SECRET_KEY='your_secret_key' | |
session = Session(aws_access_key_id=ACCESS_KEY, | |
aws_secret_access_key=SECRET_KEY) | |
s3 = session.resource('s3') | |
your_bucket = s3.Bucket('name_of_s3_bucket') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from statsmodels.tsa.stattools import adfuller | |
def test_stationarity(timeseries): | |
#Determing rolling statistics | |
rolmean = pd.rolling_mean(timeseries, window=96) | |
rolstd = pd.rolling_std(timeseries, window=96) | |
#I use 96 as my window because that is how many of my observations are contained in one 24-hr-period | |
#Plot rolling statistics: | |
fig = plt.figure(figsize=(12, 8)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bokeh.io import output_file, show | |
from bokeh.models import ( | |
GMapPlot, GMapOptions, ColumnDataSource, Circle, Range1d, PanTool, WheelZoomTool, BoxSelectTool, HoverTool, | |
) | |
from bokeh.plotting import figure | |
map_options = GMapOptions(lat=39.4699, lng=-0.3763, map_type="roadmap", zoom=13) | |
plot = GMapPlot(x_range=Range1d(), y_range=Range1d(), map_options=map_options) | |
plot.title.text = "Valenbisi Stations" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#function to webscrape | |
driver = webdriver.Firefox() | |
wait = WebDriverWait(driver, 30) | |
coords = [] | |
driver.get('https://www.google.com/maps') | |
for school in schools: | |
searchbox = wait.until(EC.presence_of_element_located((By.ID, 'searchboxinput'))) | |
searchbox.clear() | |
searchbox.send_keys(school + ' school Washington DC') | |
driver.find_element_by_id('searchbox-searchbutton').click() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
from time import sleep | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary |
NewerOlder