-
-
Save pohzipohzi/ad7942fc5545675022c1f31123e64c0c to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup | |
import requests | |
import datetime | |
import logging | |
import csv | |
def setLogger(): | |
logging.basicConfig(level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s', | |
filename='logs_file', | |
filemode='w') | |
console = logging.StreamHandler() | |
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') | |
console.setFormatter(formatter) | |
logging.getLogger('').addHandler(console) | |
def getEconomicCalendar(startlink,endlink): | |
# write to console current status | |
logging.info("Scraping data for link: {}".format(startlink)) | |
# get the page and make the soup | |
baseURL = "https://www.forexfactory.com/" | |
r = requests.get(baseURL + startlink) | |
data = r.text | |
soup = BeautifulSoup(data, "lxml") | |
# get and parse table data, ignoring details and graph | |
table = soup.find("table", class_="calendar__table") | |
# do not use the ".calendar__row--grey" css selector (reserved for historical data) | |
trs = table.select("tr.calendar__row.calendar_row") | |
fields = ["date","time","currency","impact","event","actual","forecast","previous"] | |
# some rows do not have a date (cells merged) | |
curr_year = startlink[-4:] | |
curr_date = "" | |
curr_time = "" | |
for tr in trs: | |
# fields may mess up sometimes, see Tue Sep 25 2:45AM French Consumer Spending | |
# in that case we append to errors.csv the date time where the error is | |
try: | |
for field in fields: | |
data = tr.select("td.calendar__cell.calendar__{}.{}".format(field,field))[0] | |
# print(data) | |
if field=="date" and data.text.strip()!="": | |
curr_date = data.text.strip() | |
elif field=="time" and data.text.strip()!="": | |
# time is sometimes "All Day" or "Day X" (eg. WEF Annual Meetings) | |
if data.text.strip().find("Day")!=-1: | |
curr_time = "12:00am" | |
else: | |
curr_time = data.text.strip() | |
elif field=="currency": | |
currency = data.text.strip() | |
elif field=="impact": | |
# when impact says "Non-Economic" on mouseover, the relevant | |
# class name is "Holiday", thus we do not use the classname | |
impact = data.find("span")["title"] | |
elif field=="event": | |
event = data.text.strip() | |
elif field=="actual": | |
actual = data.text.strip() | |
elif field=="forecast": | |
forecast = data.text.strip() | |
elif field=="previous": | |
previous = data.text.strip() | |
dt = datetime.datetime.strptime(",".join([curr_year,curr_date,curr_time]), | |
"%Y,%a%b %d,%I:%M%p") | |
print(",".join([str(dt),currency,impact,event,actual,forecast,previous])) | |
except: | |
with open("errors.csv","a") as f: | |
csv.writer(f).writerow([curr_year,curr_date,curr_time]) | |
# exit recursion when last available link has reached | |
if startlink==endlink: | |
logging.info("Successfully retrieved data") | |
return | |
# get the link for the next week and follow | |
follow = soup.select("a.calendar__pagination.calendar__pagination--next.next") | |
follow = follow[0]["href"] | |
getEconomicCalendar(follow,endlink) | |
if __name__ == "__main__": | |
""" | |
Run this using the command "python `script_name`.py >> `output_name`.csv" | |
""" | |
setLogger() | |
getEconomicCalendar("calendar.php?week=jan7.2007","calendar.php?week=dec24.2017") |
from bs4 import BeautifulSoup from datetime import date, datetime from typing import List import urllib.request import urllib.parse import ssl import json from pytz import timezone
class PyEcoElement(object):
def __init__(self, currency: str, event: str, impact : str, time_utc: str, actual: str, forecast: str, previous: str): self.currency = currency self.event = event self.impact = impact self.time_utc = time_utc self.actual = actual self.forecast = forecast self.previous = previous
class PyEcoRoot(object):
def __init__(self, eco_elements : List[PyEcoElement]): self.eco_elements = eco_elements
class PyEcoCal:
def GetEconomicCalendar(self, query_date: datetime): base_url = "https://www.forexfactory.com/" ssl._create_default_https_context = ssl._create_unverified_context # ctx = ssl.create_default_context() # ctx.check_hostname = False # ctx.verify_mode = ssl.CERT_NONE # html = urllib.request.urlopen(url, context=ctx).read() # get the page and make the soup urleco = f"{base_url}calendar.php?day={query_date.strftime('%b').lower()}{query_date.day}.{query_date.year}" date_string = query_date.strftime('%Y-%m-%d') opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] response = opener.open(urleco) result = response.read().decode('utf-8', errors='replace') soup = BeautifulSoup(result, "html.parser") table = soup.find_all("tr", class_="calendar_row") cal_date = soup.find_all("a", {"class": "highlight light options flexTitle"})[0].span.text.strip() eco_day = [] for item in table: dict = {} dict["Currency"] = item.find_all("td", \ {"class": "calendar__cell calendar__currency currency"})[0].text.strip() # Currency dict["Event"] = item.find_all("span", \ {"class": "calendar__event-title"})[0].text.strip() # Event Name try: time_eastern = item.find_all("td", {"class": "calendar__cell calendar__time time"})[ 0].div.text.strip() # Time Eastern datetime_eastern = datetime.strptime(f"{date_string} {time_eastern}", '%Y-%m-%d %I:%M%p') except: datetime_eastern = datetime.strptime(f"{date_string} 12:00am", '%Y-%m-%d %I:%M%p') eastern_tz = timezone('US/Eastern') dict["Time_UTC"] = eastern_tz.localize(datetime(datetime_eastern.year, datetime_eastern.month, \ datetime_eastern.day, datetime_eastern.hour, \ datetime_eastern.minute, 0)).astimezone(timezone('utc')).strftime("%Y%m%dT%H:%M:%S %z") impact = item.find_all("td", {"class": "impact"}) for icon in range(0, len(impact)): dict["Impact"] = impact[icon].find_all("span")[0]['title'].split(' ', 1)[0] try: actual_value =item.find_all("td", {"class": "calendar__cell calendar__actual actual"})[0].text if actual_value is not None: dict["Actual"] = actual_value.strip() else: dict["Actual"] = item.find_all("td", \ {"class": "calendar__cell calendar__actual actual"})[0].span.text.strip() # Actual Value except: dict["Actual"] = "" try: dict["Forecast"] = item.find_all("span", {"class": "calendar-forecast"})[ 0].text.strip() # forecasted Value except: dict["Forecast"] = "" try: dict["Previous"] = item.find_all("span", {"class": "calendar-previous"})[0].text.strip() # Previous except: dict["Previous"] = "" eco_day.append(dict) events_array = [] for row_dict in eco_day: eco_elem = PyEcoElement( row_dict["Currency"], row_dict["Event"], row_dict["Impact"], row_dict["Time_UTC"], row_dict["Actual"], row_dict["Forecast"], row_dict["Previous"] ) events_array.append(eco_elem) eco_cal = PyEcoRoot(events_array) json_object = json.dumps(eco_cal.__dict__, default=lambda o: o.__dict__, indent=3) return json_object
if name == "main": eco = PyEcoCal() json = eco.GetEconomicCalendar(datetime.today()) print(json)
This works, thanks buddy
PS C:\Users\Jasper> python -u "c:\Users\Jasper\Downloads\Lot Size Calculator\main.py"
2023-07-24 21:32:36,080 - INFO - Scraping data for link: calendar.php?week=jan7.2007
2023-07-24 21:32:36,219 - WARNING - Table not found on the page. Exiting...
Hey guys, discover this API providing access to all ForexFactory data. Unlock valuable insights and enhance your trading strategies efficiently.
Link to api: https://rapidapi.com/ousema.frikha/api/forex-factory-scraper1
And how can I fetch the next events for the next week