Skip to content

Instantly share code, notes, and snippets.

@kevinpaulconnor
Created June 6, 2017 19:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kevinpaulconnor/de0fec7733e1bdee951120005dcdcceb to your computer and use it in GitHub Desktop.
Save kevinpaulconnor/de0fec7733e1bdee951120005dcdcceb to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
""" Scrape WDRV looking for non-eagles songs with "easy" in the title """
import requests
import datetime
from bs4 import BeautifulSoup
import re
base_url = 'http://wdrv.com/wdrv-music-logs/'
payload = {}
# spoofing browser headers
headers = {'Pragma': 'no-cache',
'Origin': 'http://wdrv.com',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'en-US,en;q=0.8',
'Upgrade-Insecure-Requests': '1',
'Content-Type': 'application/x-www-form-urlencoded',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Cache-Control': 'no-cache',
'Referer': 'http://wdrv.com/wdrv-music-logs/',
'Cookie': 'visited=true; __gads=ID=0313412959dd87a8:T=1496763619:S=ALNI_MboLkiRvVQSPS5S7TNz8fJlY-iBXg; _ga=GA1.2.2104241702.1496763620; _gid=GA1.2.664781839.1496763620',
'Connection': 'keep-alive',
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"}
date = datetime.datetime(2012,12,31)
peaceful_count = 0
peaceful_dates = []
take_it_count = 0
take_it_dates = []
with open('wdrv', 'w+') as f:
# number of days, 12/31/13 to 6/5/17
for i in range(1618):
date += datetime.timedelta(days=1)
print date
formatted_date = date.strftime('%Y-%m-%d')
payload['musiclogtoview'] = date.strftime('%Y%m%d') + '-MusicLog.txt'
#r = requests.get(base_url, data=payload)
r = requests.post(base_url, headers=headers, data=payload)
if r.status_code is 200:
soup = BeautifulSoup(r.text, "html.parser")
for node in soup.find_all('tr'):
match = re.findall('<td>.*EASY.*</td>', str(node))
if match:
for item in match:
peaceful = re.search('PEACEFUL EASY FEELING', item)
take_it = re.search('TAKE IT EASY', item)
if peaceful:
peaceful_count = peaceful_count + 1
peaceful_dates.append(formatted_date)
elif take_it:
take_it_count = take_it_count + 1
take_it_dates.append(formatted_date)
else:
f.write(formatted_date + '\n')
f.write(str(match)+ '\n')
f.write("Peaceful count: " + str(peaceful_count) + '\n')
f.write('(')
for peaceful_date in peaceful_dates:
f.write(peaceful_date + ',')
f.write(')\n')
f.write("Take It count: " + str(take_it_count) + '\n')
for take_it_date in take_it_dates:
f.write(take_it_date + ',')
f.write(')\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment