Skip to content

Instantly share code, notes, and snippets.

@ahhammoud
ahhammoud / backtest.py
Created May 18, 2020 20:24
backtesting cases
from statsmodels.tsa.statespace.sarimax import SARIMAX
period = 14
df_log = np.log(df[:-period])
model = SARIMAX(df_log,
order=(1, 1, 2),
seasonal_order=(1, 1, 2, 7),
enforce_stationarity=False,
enforce_invertibility=False)
@ahhammoud
ahhammoud / full_analysis.py
Last active May 18, 2020 20:36
Full analysis code
# here we will be forecasting 21 days into future using the full dataset
from statsmodels.tsa.statespace.sarimax import SARIMAX
df_log = np.log(df)
model = SARIMAX(df_log,
order=(1, 1, 2),
seasonal_order=(1, 1, 2, 7),
enforce_stationarity=False,
enforce_invertibility=False)
@ahhammoud
ahhammoud / scrape_data.py
Last active May 18, 2020 18:55
scrape data from worldometers
# set the country you are interested in looking at
country = "uk"
# download the page that we're going to be using
page = requests.get("https://www.worldometers.info/coronavirus/country/"+ country + "/")
# create the soup from the contents on the page
soup = BeautifulSoup(page.content, "html.parser")
#%%
@ahhammoud
ahhammoud / Part2_Summary.py
Last active August 23, 2020 10:39
summary of everything explored in part 1 and part 2
# we first import the libraries we are going to be using
import requests
import pandas as pd
from bs4 import BeautifulSoup
import random
import time
import numpy as np
#%%
# this is the address of the first page in our search query
@ahhammoud
ahhammoud / Part2_NumberPages.py
Last active April 25, 2020 11:14
getting the number of pages to search through
number_class = "property-header__list-count property-header__list-count--new ge_resultsnumber text--size2 text--color1 text--normal"
number_listings = int(soup.find(class_ = number_class).get_text().replace(" results",""))
number_cards = len(cards)
number_pages = number_listings // number_cards + 1
@ahhammoud
ahhammoud / Part2_SaveCSV.py
Last active April 25, 2020 11:18
save out the dataframe to a csv file
# import pandas and the optionally os
import pandas as pd
import os
# optional: set the path to save out your csv file
os.chdir("path of the folder you want to save the file")
# construct the dataframe and pass details_list
data = pd.DataFrame(properties)
# assign the names of the columns
@ahhammoud
ahhammoud / Part2_ScrapePage.py
Last active August 23, 2020 10:37
summary of part 2 of the tutorial
# initialize an empty list to pass property details into
details_list = []
# define the name of the function and the variables going into it
def scrape_page(soup,details_list):
cards = soup.find_all(class_="card__content")
for card in cards:
# import the necessary libraries
import requests
from bs4 import BeautifulSoup
# download the page that will be scraped
page = requests.get("address of your page")
# parse the webpage using BeautifulSoup
soup = BeautifulSoup(page.content, "html.parser")
# collect all the listing cards on the page
@ahhammoud
ahhammoud / export_dataframe.py
Created April 6, 2020 20:57
convert list to pandas dataframe, assign names to columns and export to csv
# with the data ready we now add it to a dataframe and save it out for further analysis
data = pd.DataFrame(properties)
data.columns = ["description", "building", "price", "property type", "bedrooms", "bathrooms", "area"]
data.to_csv("BusinessBay_data.csv")
@ahhammoud
ahhammoud / Part2_ScrapeAllPages.py
Last active April 25, 2020 11:15
for loop that scrapes through different pages
# we then use the page scraper to scrape the pages that we will be digging data out of
details_list = []
pages = "the number of pages in your search"
for page_number in range(1,pages,1):
url = "https://www.propertyfinder.ae/en/search?c=1&l=36&ob=pd&page="+str(page_number)+"&t=1"
page = requests.get(url)
soup = BeautifulSoup(page.content, "html.parser")
scrape_page(soup, details_list)
print("finished page: %i" %page_number)