gr-a-m/get-slides.py

## get-slides.py
import hashlib
import requests
import sys
import time

def main():
    # Extract the keyword to search as the command-line argument
    keyword = ' '.join(sys.argv[1:])

    # Set up the auth for the request
    api_key = 'UHk20Fg6'
    shared_secret = 'p4lNW7jg'
    timestamp = int(time.time())
    m = hashlib.sha1()
    m.update(shared_secret.encode())
    m.update(str(timestamp).encode())

    # Set the parameters to pass in the request
    items_per_page = 50
    q = keyword
    detailed = 1

    # Send the request
    params = {
        "api_key": api_key,
        "ts": timestamp,
        "hash": m.hexdigest(),
        "items_per_page": items_per_page,
        "q": q,
        "sort": "mostviewed",
        "detailed": detailed,
        "lang": "en",
        "what": "tag"
    }
    r = requests.get("https://www.slideshare.net/api/2/search_slideshows", params=params)
    print(r.text)

if __name__ == '__main__':
    main()

## process-xml.py
import codecs
import os
import sys
import datetime
from xml.etree import ElementTree

with codecs.open(sys.argv[1], encoding='utf-8') as f, \
        codecs.open(os.path.splitext(sys.argv[1])[0] + '.csv', 'w', encoding='utf-8') as w:
    tree = ElementTree.parse(f)
    w.write("Title,Author,Description,URL,Date,Views,Downloads,Comments,Favorites,Slides,Tags\n")
    for node in tree.iter('Slideshow'):
        title = node.find('Title').text.strip()
        description = node.find('Description').text.replace("\n", " ") if \
            node.find('Description').text is not None else ""
        url = node.find('URL').text.strip()
        date = datetime.datetime.strptime(node.find('Created').text.strip(), "%Y-%m-%d %H:%M:%S %Z")
        views = int(node.find('NumViews').text)
        downloads = int(node.find('NumDownloads').text)
        comments = int(node.find('NumComments').text)
        favorites = int(node.find('NumFavorites').text)
        slides = int(node.find('NumSlides').text)
        author = node.find('Username').text.strip()
        tags = [tag.text for tag in node.iter('Tag')]
        tag_string = ','.join(tags)
        w.write(u"\"{}\",\"{}\",\"{}\",{},{},{},{},{},{},{},\"{}\"\n".format(
            title, author, description, url, date.strftime("%m/%d/%Y"), views, downloads, comments,
            favorites, slides, tag_string))

## requirements.txt
requests
	import hashlib
	import requests
	import sys
	import time

	def main():
	# Extract the keyword to search as the command-line argument
	keyword = ' '.join(sys.argv[1:])

	# Set up the auth for the request
	api_key = 'UHk20Fg6'
	shared_secret = 'p4lNW7jg'
	timestamp = int(time.time())
	m = hashlib.sha1()
	m.update(shared_secret.encode())
	m.update(str(timestamp).encode())

	# Set the parameters to pass in the request
	items_per_page = 50
	q = keyword
	detailed = 1

	# Send the request
	params = {
	"api_key": api_key,
	"ts": timestamp,
	"hash": m.hexdigest(),
	"items_per_page": items_per_page,
	"q": q,
	"sort": "mostviewed",
	"detailed": detailed,
	"lang": "en",
	"what": "tag"
	}
	r = requests.get("https://www.slideshare.net/api/2/search_slideshows", params=params)
	print(r.text)

	if __name__ == '__main__':
	main()
	import codecs
	import os
	import sys
	import datetime
	from xml.etree import ElementTree

	with codecs.open(sys.argv[1], encoding='utf-8') as f, \
	codecs.open(os.path.splitext(sys.argv[1])[0] + '.csv', 'w', encoding='utf-8') as w:
	tree = ElementTree.parse(f)
	w.write("Title,Author,Description,URL,Date,Views,Downloads,Comments,Favorites,Slides,Tags\n")
	for node in tree.iter('Slideshow'):
	title = node.find('Title').text.strip()
	description = node.find('Description').text.replace("\n", " ") if \
	node.find('Description').text is not None else ""
	url = node.find('URL').text.strip()
	date = datetime.datetime.strptime(node.find('Created').text.strip(), "%Y-%m-%d %H:%M:%S %Z")
	views = int(node.find('NumViews').text)
	downloads = int(node.find('NumDownloads').text)
	comments = int(node.find('NumComments').text)
	favorites = int(node.find('NumFavorites').text)
	slides = int(node.find('NumSlides').text)
	author = node.find('Username').text.strip()
	tags = [tag.text for tag in node.iter('Tag')]
	tag_string = ','.join(tags)
	w.write(u"\"{}\",\"{}\",\"{}\",{},{},{},{},{},{},{},\"{}\"\n".format(
	title, author, description, url, date.strftime("%m/%d/%Y"), views, downloads, comments,
	favorites, slides, tag_string))