Skip to content

Instantly share code, notes, and snippets.

@arsenyinfo
Created March 5, 2016 20:47
Show Gist options
  • Save arsenyinfo/22d5ae4ad09733134c16 to your computer and use it in GitHub Desktop.
Save arsenyinfo/22d5ae4ad09733134c16 to your computer and use it in GitHub Desktop.
This script can be useful if you need to parse products from Product Hunt into single table. It was tested with Python 3 but probably will work with Python 2.7 as well.
import logging
import pandas as pd
import requests
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%H:%M:%S', )
logger = logging.getLogger(__name__)
token = 'Bearer INSERT_YOUR_TOKEN_HERE'
def parse(page):
for item in page:
result = dict()
for k in ('tagline', 'name', 'redirect_url', 'featured', 'discussion_url', 'day', 'votes_count'):
result[k] = item[k]
yield result
def get_index(page):
try:
index = min([x.get('id') for x in page])
except ValueError:
index = 0
return index
with requests.Session() as s:
s.headers['Authorization'] = token
s.headers['Content-Type'] = 'application/json'
s.headers['Accept'] = 'application/json'
s.headers['Host'] = 'api.producthunt.com'
post_id = 1000000000
data = []
while post_id:
url = 'https://api.producthunt.com/v1/posts/all?older={}'.format(post_id)
logger.info('Fetching posts older than {}'.format(post_id))
page = s.get(url).json().get('posts')
for item in parse(page):
data.append(item)
post_id = get_index(page)
data = pd.DataFrame(data)
data.to_csv('product_hunt.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment