Skip to content

Instantly share code, notes, and snippets.

@ktread
Last active July 27, 2019 19:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ktread/c6ab85f6697f2b7ccbd5bcfab08ad1a8 to your computer and use it in GitHub Desktop.
Save ktread/c6ab85f6697f2b7ccbd5bcfab08ad1a8 to your computer and use it in GitHub Desktop.
python for mountain scraping
# What is the longest/shortest/avg route
mountain_route_insight = fin_routes.groupby('Mountain').agg({'ElevationGain':
['min', 'max','mean'],'RTMileage': ['min', 'max','mean']})
{'Mt. Antero': (('07-27-2019 00:00:00', '56', '15', '15',
'Chance Showers And Thunderstorms'),
('07-27-2019 12:00:00', '42', '10', '20',
'Chance Showers And Thunderstorms then Partly Cloudy'),
('07-28-2019 00:00:00', '55', '15', '25',
'Sunny then Slight Chance Showers And Thunderstorms'),
('07-28-2019 12:00:00', '42', '15', '15',
'Chance Showers And Thunderstorms then Partly Cloudy'), ...}
CONVERT = {1:0,2:12,3:24,4:36,5:48,6:60,7:72,8:84,9:96,10:108}
def clean_weather(eachmtn):
ind = 0
dttmp = []
full_weather = {}
for item in eachmtn:
full_string = re.search("(.+:) (\d+)°F(\d+)-?(\d+)?(mph)(.+)",item)
day = re.search("(.+:)",item)
if day:
today = datetime.date.today()
j = datetime.datetime(today.year, today.month, today.day)
degree = full_string.group(2)
windslow = full_string.group(3)
windshigh = full_string.group(4) if full_string.group(4)
else full_string.group(3)
desc = full_string.group(6)
wdate = (j + datetime.timedelta(hours=CONVERT[ind])).strftime('%m-%d-%Y %H:%M:%S')
dttmp.append((wdate,degree, windslow, windshigh, desc))
ind = ind+1
full_weather[eachmtn[0]] =tuple(dttmp)
return full_weather
import seaborn as sns
import matplotlib
plt.rcParams['figure.figsize']=(10,4)
sns.set_style("white")
sns.set_context("paper", font_scale=1)
z = sns.barplot(x=routes_class.index, y="median",
data=routes_class['ElevationGain'],color="dimgray")
z.set_xticklabels(routes_class.index,rotation=30)
z.set(ylabel='Median Elevation Gain')
import re
import requests
from bs4 import BeautifulSoup
import datetime
response = requests.get('https://14ers.com/routes.php')
html = BeautifulSoup(response.text, 'html.parser')
def get_mountains():
td = html.find_all('tr')
mountain_data = ([row.get_text(strip=True)
for row in td if row.get_text(strip=True)[:6] != 'Routes'])
return mountain_data
def clean_mountain_data(mountain_data):
mountains = {}
reg = "(.+)(14,\d{3})(')(\d+|\*?)(.+)"
for i in range(0, len(mountain_data)):
mountain = []
match = re.match(reg,str(mountain_data[i]))
name = match.group(1)
details = [int(match.group(2).replace(',','')),
(match.group(4).replace('*','')),match.group(5)]
mountains[name] = details
return mountains
# How many mountains are in each class
# Is gain related to class
routes_class = fin_routes.groupby('Class').agg({'ElevationGain': \
['min', 'max','median','mean','std'],'RTMileage':
['min', 'max','median','mean','std'],
'Mountain':['nunique']})
import pandas as pd
import numpy as np
import seaborn as sns
import mountains
import matplotlib.pyplot as plt
plt.style.use('ggplot')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
mountain_detail, routes, weather_data = mountains.call_data()
plt.rcParams['figure.figsize']=(10,14)
am_weather = fin_weather[fin_weather['Hour']==0]
wheatm = am_weather.pivot("Mountain", "Date", "Temp")
heat_map = sns.heatmap(wheatm,cmap="RdBu_r",center=60,xticklabels='')
heat_map.set( xlabel="Afternoon Weather")
[['Mt. Antero', 'TodayHigh: 56°F15mphChance Showers And Thunderstorms',
'TonightLow: 42°F10-20mphChance Showers And Thunderstorms then Partly Cloudy',
'SundayHigh: 55°F15-25mphSunny then Slight Chance Showers And Thunderstorms', ....]]
def get_mountain_weather():
mtn_url = get_weather_urls()
reg = "(.+)(:)( \d+)(°F)(\d+-?\d+)(mph)(.+)"
all_weather = []
for mtn_name,url in mtn_url.items():
response = requests.get(url)
html = BeautifulSoup(response.text, 'html.parser')
weather = [mtn_name]
for divtag in html.find_all('table', {'class': 'forecastDays'}):
for atag in divtag.find_all('td'):
weather.append(atag.get_text(strip=True))
all_weather.append(weather)
return all_weather
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment