Skip to content

Instantly share code, notes, and snippets.

@krysits
Created September 2, 2015 19:44
Show Gist options
  • Save krysits/5aa70e5e080f2b75a1f3 to your computer and use it in GitHub Desktop.
Save krysits/5aa70e5e080f2b75a1f3 to your computer and use it in GitHub Desktop.
Python Script that scrapes it-ebooks.net for search query and saves Book ID in text file.
#!/usr/bin/env python3
import urllib.request
import json
queryString = 'javascript'
ur = 'http://it-ebooks-api.info/v1/'
def getFile(urla):
response = urllib.request.urlopen(urla)
data = json.loads(response.readall().decode('utf-8'))
return data
def getBookByID(bookID):
urla = ur + 'book/' +str(bookID)
return getFile(urla)
def searchByKeyword(queri,pageNr=1):
urla = ur + 'search/' + queri + '/page/' + str(pageNr)
return getFile(urla)
def getAllBooks(queri):
rezults = []
bodi = searchByKeyword(queri)
totalBooks = int(bodi["Total"])
totalPages = totalBooks / 10
counter = 0
while(totalBooks > counter):
currentPage = bodi["Page"]
for buuk in bodi["Books"]:
oneID = buuk["ID"]
rezults.append(oneID)
counter += 1
nextPage = int(currentPage) + 1
if(nextPage < totalPages):
bodi = searchByKeyword(queri, nextPage)
return rezults
# run main
booksByCategory = getAllBooks(queryString)
fo = open(queryString + ".txt", "w")
for buukID in booksByCategory:
tekst = str(buukID) + "\n"
fo.write( tekst );
fo.close()
#bodi = getBookByID('2279690981')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment