adammichaelwood/amazon.py

## amazon.py
import sys
import google
import time
import random
from bs4 import BeautifulSoup
import urllib.request
import http.cookiejar
from selenium import webdriver
import subprocess
import blessings
from operator import itemgetter

browser = webdriver.Firefox()
# import Resource
topic = input("Topic: ").replace(" ", "+")


t = blessings.Terminal()

amz_base = "https://www.amazon.com/s/?url=search-alias%3Dstripbooks&field-keywords="


browser.get(amz_base + topic)

change_default_format = input("Default format is paperback. Change? ")
if not change_default_format:
    browser.find_element_by_partial_link_text("Paperback").click()

pages_to_search = int(input("Pages to search? (About 10-12 books per page) "))

amz_links = set()


for i in range(pages_to_search):

    titles = browser.find_elements_by_class_name("s-access-detail-page")

    for title in titles:
        title_url = title.get_attribute('href').split('/ref', 1)[0]
        print(title_url)
        amz_links.add(title_url)

    time.sleep(5)
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    try:
        browser.find_element_by_partial_link_text('Next Page').click()
    except:
        break


print("Total books links: " + str(len(amz_links)))

books = list()

time.sleep(5)

count = 0

for book_url in amz_links:

    count += 1
    print(str(count) + " of " + str(len(amz_links)))

    book = dict()
    print(book_url)
    browser.get(book_url)
    #book['title'] = browser.find_element_by_id('productTitle').text
    #print("Title: " + book['title'])
    use = input("Use book? ")
    if not use:
        continue

    book['url'] = book_url
    title = browser.find_element_by_id('productTitle').text
    print("TITLE: " + title)
    truncate_at = input("TRUNCATE: ")
    if not truncate_at:
        book['title'] = title
    else:
        book['title'] = title.split(truncate_at, 1)[0]
    print("TITLE TO USE: " + book['title'])

    authors = browser.find_elements_by_class_name('author')
    author_names = list()

    for author in authors:

        name = author.text

        if "(Author)" not in name:
            continue

        name = name.split(' (Au', 1)[0]

        author_names.append(name)

    num_of_authors = len(author_names)

    author_string = ""

    if ( num_of_authors == 1 ):
        author_string = author_names[0]
    else:
        for i in range(len(author_names)):
            last_name = author_names[i].split(' ', -1)[-1]
            print("Original: " + author_names[i] + "\n"
                "Last Name: " + last_name
            )
            fix_last_name = input("Fix last name: ")
            if not fix_last_name:
                author_names[i] = last_name
            else:
                author_names[i] = fix_last_name


    if ( num_of_authors == 2):
        author_string = author_names[0] + " and " + author_names[1]

    if ( num_of_authors == 3):
        author_string = author_names[0] + ", " + author_names[1] + ", and " + author_names[2]

    if ( num_of_authors > 3):
        author_string = author_names[0] + " et al."

    print(author_string)

    book['author'] = author_string

    details_element = browser.find_element_by_xpath("//*[contains(text(), 'Product Details')]")

    browser.execute_script("return arguments[0].scrollIntoView();", details_element)

    book['year'] = input("Publication year: ")

    book['description'] = input("Description: ")

    book_string = " - [_{title}_]({url}) ({year}), by {author}, {description}\n".format(**book)

    print(t.yellow(book_string))

    book['string'] = book_string

    books.append(book)


for book in books:
    print(book['string'])
	import sys
	import google
	import time
	import random
	from bs4 import BeautifulSoup
	import urllib.request
	import http.cookiejar
	from selenium import webdriver
	import subprocess
	import blessings
	from operator import itemgetter

	browser = webdriver.Firefox()
	# import Resource
	topic = input("Topic: ").replace(" ", "+")


	t = blessings.Terminal()

	amz_base = "https://www.amazon.com/s/?url=search-alias%3Dstripbooks&field-keywords="


	browser.get(amz_base + topic)

	change_default_format = input("Default format is paperback. Change? ")
	if not change_default_format:
	browser.find_element_by_partial_link_text("Paperback").click()

	pages_to_search = int(input("Pages to search? (About 10-12 books per page) "))

	amz_links = set()


	for i in range(pages_to_search):

	titles = browser.find_elements_by_class_name("s-access-detail-page")

	for title in titles:
	title_url = title.get_attribute('href').split('/ref', 1)[0]
	print(title_url)
	amz_links.add(title_url)

	time.sleep(5)
	browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

	try:
	browser.find_element_by_partial_link_text('Next Page').click()
	except:
	break


	print("Total books links: " + str(len(amz_links)))

	books = list()

	time.sleep(5)

	count = 0

	for book_url in amz_links:

	count += 1
	print(str(count) + " of " + str(len(amz_links)))

	book = dict()
	print(book_url)
	browser.get(book_url)
	#book['title'] = browser.find_element_by_id('productTitle').text
	#print("Title: " + book['title'])
	use = input("Use book? ")
	if not use:
	continue

	book['url'] = book_url
	title = browser.find_element_by_id('productTitle').text
	print("TITLE: " + title)
	truncate_at = input("TRUNCATE: ")
	if not truncate_at:
	book['title'] = title
	else:
	book['title'] = title.split(truncate_at, 1)[0]
	print("TITLE TO USE: " + book['title'])

	authors = browser.find_elements_by_class_name('author')
	author_names = list()

	for author in authors:

	name = author.text

	if "(Author)" not in name:
	continue

	name = name.split(' (Au', 1)[0]

	author_names.append(name)

	num_of_authors = len(author_names)

	author_string = ""

	if ( num_of_authors == 1 ):
	author_string = author_names[0]
	else:
	for i in range(len(author_names)):
	last_name = author_names[i].split(' ', -1)[-1]
	print("Original: " + author_names[i] + "\n"
	"Last Name: " + last_name
	)
	fix_last_name = input("Fix last name: ")
	if not fix_last_name:
	author_names[i] = last_name
	else:
	author_names[i] = fix_last_name



	if ( num_of_authors == 2):
	author_string = author_names[0] + " and " + author_names[1]

	if ( num_of_authors == 3):
	author_string = author_names[0] + ", " + author_names[1] + ", and " + author_names[2]

	if ( num_of_authors > 3):
	author_string = author_names[0] + " et al."

	print(author_string)

	book['author'] = author_string

	details_element = browser.find_element_by_xpath("//*[contains(text(), 'Product Details')]")

	browser.execute_script("return arguments[0].scrollIntoView();", details_element)

	book['year'] = input("Publication year: ")

	book['description'] = input("Description: ")

	book_string = " - [_{title}_]({url}) ({year}), by {author}, {description}\n".format(**book)

	print(t.yellow(book_string))

	book['string'] = book_string

	books.append(book)


	for book in books:
	print(book['string'])