Created
February 10, 2019 15:18
-
-
Save olihawkins/90f53659d9a9d22384a3c1dba05d5cf3 to your computer and use it in GitHub Desktop.
Python module for downloading planetary data for More or Less
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
This is the Python module I used for scraping planetary data for the BBC radio | |
programme More or Less. You can read more about it here: | |
http://olihawkins.com/2019/02/1 | |
""" | |
# Imports -------------------------------------------------------------------- | |
import datetime | |
import json | |
import os | |
import pandas | |
import requests | |
import time | |
from matplotlib import pyplot as plt | |
from matplotlib import rcParams | |
plt.style.use('bmh') | |
rcParams['font.family'] = 'sans-serif' | |
rcParams['font.sans-serif'] = ['Helvetica Neue'] | |
# Constants ------------------------------------------------------------------ | |
URL = 'https://keisan.casio.com/exec/system/1224746378' | |
HEADERS = {'content-type': 'application/x-www-form-url-encoded'} | |
LONGITUDE = 0.0004 | |
LATITUDE = 51.4777 | |
HOUR = 12 | |
TIMEZONE = 0 | |
VOYAGER_DISTANCE = 142.0 | |
DATA_DIR = 'data' | |
SLEEP_TIME = 1 | |
COLS = [ | |
'Date', | |
'Mercury', | |
'Venus', | |
'Mars', | |
'Jupiter', | |
'Saturn', | |
'Uranus', | |
'Neptune', | |
'Pluto', | |
'Nearest'] | |
# Functions ------------------------------------------------------------------ | |
def request(date): | |
""" | |
Send an http POST to the server requesting data for a given date. The | |
long, lat, time and timezones are all constant. The return value is the raw | |
html returned. | |
""" | |
data = { | |
'var_λo': LONGITUDE, | |
'var_φ': LATITUDE, | |
'var_M': date.month, | |
'var_D': date.day, | |
'var_Y': date.year, | |
'var_H': HOUR, | |
'var_I': TIMEZONE | |
} | |
response = requests.post(URL, headers=HEADERS, data=data) | |
if response.status_code != 200: | |
raise Exception(response.status_code, response.text) | |
return response.text | |
def parse(date, text): | |
""" | |
Parse the text of the response and return a dictionary of each planet's | |
distance from the Earth, and the nearest planet to the earth, on the given | |
date. | |
""" | |
start = text.find('exedata') + 10 | |
text = text[start:] | |
end = text.find('addTableClass') - 2 | |
text = text[:end] | |
planets_list = json.loads(text)[1:] | |
planets = {'Date': date.isoformat()} | |
nearest_distance = VOYAGER_DISTANCE | |
nearest_planet = None | |
for planet in planets_list: | |
name = planet[0] | |
distance = float(planet[3]) | |
if distance < nearest_distance: | |
nearest_distance = distance | |
nearest_planet = name | |
planets[name] = distance | |
planets['Nearest'] = nearest_planet | |
return planets | |
def fetch_date(date): | |
"""Fetch the planet data for a given date.""" | |
text = request(date) | |
planets = parse(date, text) | |
return planets | |
def fetch_years(years): | |
""" | |
Get the data for each day in each of the given years and save it to a | |
csv. The function returns the dataframe. | |
""" | |
start = years[0] | |
rows = [] | |
for year in years: | |
dates = get_dates(year) | |
for date in dates: | |
print('Getting data for {0} ...'.format(date.isoformat())) | |
rows.append(fetch_date(date)) | |
time.sleep(SLEEP_TIME) | |
df = pandas.DataFrame.from_dict(rows) | |
df = df[COLS] | |
filename = os.path.join(DATA_DIR, '{0}-{1}.csv'.format(start, year)) | |
df.to_csv(filename, index=False) | |
return df | |
def get_dates(year): | |
"""Get a list of dates for each day in the given calendar year.""" | |
start = datetime.datetime(year, 1, 1) | |
end = datetime.datetime(year + 1, 1, 1) | |
delta = datetime.timedelta(1) | |
days = (end - start).days | |
dates = [] | |
dt = start | |
for d in range(days): | |
dates.append(dt.date()) | |
dt = dt + delta | |
return dates | |
def plot_planets(data): | |
"""Plot the data.""" | |
date = data['Date'] | |
mercury = data['Mercury'] | |
venus = data['Venus'] | |
mars = data['Mars'] | |
plt.figure(figsize=(12, 4)) | |
plt.plot( | |
date, | |
mercury, | |
lw=1.2, | |
ls='-', | |
color='#a19ba5', | |
label='Mercury') | |
plt.plot( | |
date, | |
venus, | |
lw=1.2, | |
ls='-', | |
color='#f44198', | |
label='Venus') | |
plt.plot( | |
date, | |
mars, | |
lw=1.2, | |
ls='-', | |
color='#0090ff', | |
label='Mars') | |
plt.xticks( | |
[0, 3653, 7305, 10958, 14610, 18263], | |
['1968', '1978', '1988', '1998', '2008', '2018']) | |
plt.ylim([0, 3]) | |
plt.legend(loc='upper right', facecolor='#ffffff', fontsize=11) | |
plt.title('Distance of Mercury, Venus and Mars from Earth in Astronomical Units', | |
fontsize=13, fontweight='semibold'); | |
plt.tight_layout() | |
ax = plt.gca() | |
ttl = ax.title | |
ttl.set_position([0.5, 1.0]) | |
plt.savefig(os.path.join('plots', 'planets.svg'), format='svg') | |
plt.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment