Skip to content

Instantly share code, notes, and snippets.

@CodeSigils
Last active January 31, 2022 12:56
Show Gist options
  • Save CodeSigils/f0eb82b0da00ff6f9737e38dfe2256ac to your computer and use it in GitHub Desktop.
Save CodeSigils/f0eb82b0da00ff6f9737e38dfe2256ac to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# coding: utf-8
# #### Step 1 - Imports
# In[4]:
import requests
import pandas as pd
import xlsxwriter
# #### Step 2 - Requests & CURL
# - The request copied from dev tools using the right click / "Copy as Curl(bash)" menu and converted to Python using https://curlconverter.com/#python online tool
# In[11]:
# curl command from developer tools converted from: https://curlconverter.com/#python
headers = {
'sec-ch-ua': '^\\^Chromium^\\^;v=^\\^94^\\^, ^\\^Google',
'Referer': 'https://www.ebooks.com/en-us/subjects/computers/',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4590.0 Safari/537.36',
'sec-ch-ua-platform': '^\\^Windows^\\^',
'Content-Type': 'application/json',
}
params = (
('subjectId', '13'),
('pageNumber', '1'),
('countryCode', 'US'),
)
# This comes from the CURL converter: https://curlconverter.com/#python
response = requests.get('https://www.ebooks.com/api/search/subject/', headers=headers, params=params)
# #### Step 3 - Check Status Code
# In[7]:
response
# In[47]:
params[2][1]
# #### Step 4 - Create Json Object
# In[9]:
response.json()
# In[7]:
type(response.json())
# #### Step 5 - Output Keys
# In[8]:
response.json().keys()
# #### Step 6 - Find your Data
# - Define data points and then access everything in 'books' key:
# In[15]:
# title
# subtitle
# author
# publisher
# publication year
# price
# In[9]:
response.json()['books']
# - Find the length of the request:
# In[10]:
results_json = response.json()['books']
# In[11]:
len(results_json)
# - Get the Results for the first item:
# In[12]:
results_json[0]
# In[13]:
# thumbnail
results_json[0]['image_url']
# In[14]:
# title
results_json[0]['title']
# In[15]:
# subtitle
results_json[0]['subtitle']
# In[38]:
# author
results_json[0]['authors'][0]['author_name']
# In[16]:
# publisher
results_json[0]['publisher']
# In[17]:
# publication year
results_json[0]['publication_year']
# In[18]:
# price
results_json[0]['price']
# #### Step 7 - Put everything together - Loop through results and append data inside a list
# In[19]:
title = []
subtitle = []
author = []
publisher = []
publication_year = []
price = []
for result in results_json:
# title
title.append(result['title'])
# subtitle
subtitle.append(result['subtitle'])
# author
author.append(result['authors'][0]['author_name'])
# publisher
publisher.append(result['publisher'])
# publication_year
publication_year.append(result['publication_year'])
# price
price.append(result['price'])
# In[48]:
# double check
author, '----', title
# #### Step 8 - Pandas Dataframe
# In[21]:
books_df = pd.DataFrame({'Title':title, 'Subtitle':subtitle, 'Author':author, 'Publisher':publisher,
'Publication Year': publication_year, 'Price':price})
# In[22]:
books_df
# #### Step 9 - Store results in Excel
# In[23]:
books_df.to_excel('books.xlsx', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment