Skip to content

Instantly share code, notes, and snippets.

@Litwilly
Created June 24, 2016 13:18
Show Gist options
  • Save Litwilly/4ca0d14b3612d9bfdc143469ee37b62c to your computer and use it in GitHub Desktop.
Save Litwilly/4ca0d14b3612d9bfdc143469ee37b62c to your computer and use it in GitHub Desktop.
cURL Fitbit community groups with pycurl, scrape pages with BeautifulSoup, send to Redis (wrote for getting data to create Fitbit dashboard on dashing.io framework)
#!/usr/bin/python
from bs4 import BeautifulSoup
import pycurl
import re
import os
from urllib import urlencode
from io import BytesIO
from StringIO import StringIO
import sys
import redis
import time
class getFitbitData:
cookieDir = './fbcookie.txt' #where we're storing our cookies
#Config Redis server we're connecting to
pool = redis.ConnectionPool( host='0.0.0.0', port=6379,password='ifneeded',db=12 )
redisServer = redis.Redis( connection_pool=pool )
pipe = redisServer.pipeline()
# 2D array group name used in Redis key, and groups fitbit URL
groups = [["XXXX","https://www.fitbit.com/group/XXXXXX"],["XXXX","https://www.fitbit.com/group/XXXXXX"]]
date = time.strftime("%Y-%m-%d")
def __init__( self ):
#Where we store our Unity Crap
self.password = 'yourpass'
self.user = 'youruser'
def getHTML( self, groupURL, page ):
print "authenticate"
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(c.SSL_VERIFYPEER, False)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.TIMEOUT, 60)
c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
c.setopt(c.URL, 'https://www.fitbit.com/login')
c.setopt(c.WRITEFUNCTION, buffer.write)
c.perform()
html = str(buffer.getvalue())
#Get hidden values for post
if "_sourcePage" in html:
rex = re.compile( "input type=\"hidden\" name=\"_sourcePage\" value=\"(.*?)\"")
sourcepage = rex.search( html ).groups()[0]
if "__fp" in html:
rex = re.compile( "input type=\"hidden\" name=\"__fp\" value=\"(.*)\"")
fp = rex.search( html ).groups()[0]
datastuff = {'login':'Log In','disableThirdPartyLogin':'false','email':self.user,'password':self.password,'rememberMe':'true'}
#post datastuff
c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
c.setopt(c.URL, 'https://www.fitbit.com/login' )
c.setopt(c.COOKIEJAR, self.cookieDir)
c.setopt(c.COOKIEFILE, self.cookieDir )
c.setopt(c.WRITEFUNCTION, buffer.write)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.POST, True)
c.setopt(c.POSTFIELDS, urlencode( datastuff ))
c.perform()
buffer.flush()
buffer = BytesIO()
c.setopt(c.USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0')
c.setopt(c.URL, groupURL+'/leaders?timeWindow=CURRENT_MONTH&page='+page)
c.setopt(c.COOKIEJAR, self.cookieDir)
c.setopt(c.COOKIEFILE, self.cookieDir )
c.setopt(c.WRITEFUNCTION, buffer.write)
c.perform()
html = str(buffer.getvalue())
return html
# c.close()
def parseHTML( self, html, group ):
soup = BeautifulSoup(html, "html.parser")
count = 0
# find all a hrefs with class formlink
for leftCell in soup.find_all("div", {"class": "leaderboardCell left"}):
for mylink in leftCell.find_all("div", {"class": "info"}):
for link in mylink.find_all("a"):
name = (link.get_text())
for link in mylink.find_all("li", {"class": "stat ellipsis"}):
t = (link.get_text())
t = "".join(t.split())
t = t[:-5]
steps = int(t.replace(',', ''))
for link in mylink.find_all("li", {"class": "average ellipsis"}):
a = (link.get_text()[:-5])
avg = int(a.replace(',', ''))
print name
print steps
self.redisServer.zadd("all:steps",name,steps)
self.redisServer.zadd(group+":steps",name,steps)
count += 1
return count
fit = getFitbitData()
#Run through groups if more then 25 listings on a page go to the next page
for group in fit.groups:
html = fit.getHTML( group[1], "0" )
listCount = fit.parseHTML( html, group[0] )
page = 1
while listCount == 25:
print "run again"
html = fit.getHTML( group[1], str(page) )
listCount = fit.parseHTML( html, group[0] )
page +=1
# Calculate Average - Pull all data from redis
# get total and start counter for every step count greater then 0
s = 0
d = 0
z = fit.redisServer.zrange(group[0]+':steps',0,-1,withscores=True)
for x in z:
if x[1] > 0:
s = s +x[1]
d += 1
# math for average
avg = s/d
fit.redisServer.delete(group[0]+":avg:"+fit.date)
fit.redisServer.sadd(group[0]+":avg:"+fit.date,avg)
print "main done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment