Skip to content

Instantly share code, notes, and snippets.

@gr-a-m
Last active August 29, 2015 14:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gr-a-m/2c74dcbdbef8829a013b to your computer and use it in GitHub Desktop.
Save gr-a-m/2c74dcbdbef8829a013b to your computer and use it in GitHub Desktop.
Slideshare Post Retrieval and Processing
import hashlib
import requests
import sys
import time
def main():
# Extract the keyword to search as the command-line argument
keyword = ' '.join(sys.argv[1:])
# Set up the auth for the request
api_key = 'UHk20Fg6'
shared_secret = 'p4lNW7jg'
timestamp = int(time.time())
m = hashlib.sha1()
m.update(shared_secret.encode())
m.update(str(timestamp).encode())
# Set the parameters to pass in the request
items_per_page = 50
q = keyword
detailed = 1
# Send the request
params = {
"api_key": api_key,
"ts": timestamp,
"hash": m.hexdigest(),
"items_per_page": items_per_page,
"q": q,
"sort": "mostviewed",
"detailed": detailed,
"lang": "en",
"what": "tag"
}
r = requests.get("https://www.slideshare.net/api/2/search_slideshows", params=params)
print(r.text)
if __name__ == '__main__':
main()
import codecs
import os
import sys
import datetime
from xml.etree import ElementTree
with codecs.open(sys.argv[1], encoding='utf-8') as f, \
codecs.open(os.path.splitext(sys.argv[1])[0] + '.csv', 'w', encoding='utf-8') as w:
tree = ElementTree.parse(f)
w.write("Title,Author,Description,URL,Date,Views,Downloads,Comments,Favorites,Slides,Tags\n")
for node in tree.iter('Slideshow'):
title = node.find('Title').text.strip()
description = node.find('Description').text.replace("\n", " ") if \
node.find('Description').text is not None else ""
url = node.find('URL').text.strip()
date = datetime.datetime.strptime(node.find('Created').text.strip(), "%Y-%m-%d %H:%M:%S %Z")
views = int(node.find('NumViews').text)
downloads = int(node.find('NumDownloads').text)
comments = int(node.find('NumComments').text)
favorites = int(node.find('NumFavorites').text)
slides = int(node.find('NumSlides').text)
author = node.find('Username').text.strip()
tags = [tag.text for tag in node.iter('Tag')]
tag_string = ','.join(tags)
w.write(u"\"{}\",\"{}\",\"{}\",{},{},{},{},{},{},{},\"{}\"\n".format(
title, author, description, url, date.strftime("%m/%d/%Y"), views, downloads, comments,
favorites, slides, tag_string))
@gr-a-m
Copy link
Author

gr-a-m commented Jan 12, 2015

Example usage:

pip install -r requirements.txt
# Retrieve the top machine learning posts directly from the API
python get-slides.py machine learning > machine-learning.xml 
# Put the relevant data from the XML file generated in the previous command into machine-learning.tsv
python process-xml.py machine-learning.xml

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment