Last active
February 28, 2017 11:40
-
-
Save balkian/5a26850fd18725fc17d698fd6d09f1ba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.support.ui import Select | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
#import selenium.webdriver.firefox.webdriver as fwb | |
import selenium.webdriver.chrome as cwd | |
from selenium.webdriver.support import expected_conditions as EC | |
import time | |
import os | |
import csv | |
from urllib import request | |
from urllib.error import HTTPError | |
home = 'https://www.aliexpress.com/'; | |
ATTRS = ['time', 'id', 'image', 'store', 'status', 'title', 'price', 'quantity'] | |
# ff_bin = fwb.FirefoxBinary(firefox_path='/usr/bin/firefox') | |
# ff_profile = fwb.FirefoxProfile() | |
options = cwd.options.Options() | |
options.binary_location = '/usr/bin/chromium' | |
options.add_argument('--lang=en') | |
options.add_argument('--user-data-dir=/tmp/aliexpress') | |
driver = webdriver.Chrome(chrome_options=options) | |
# driver.set_window_size(1024, 768) | |
def login(): | |
# Login | |
driver.get(home) | |
try: | |
lb = driver.find_element_by_link_text('Go to Global Site (English)') | |
lb.click() | |
except: | |
print('Already in English') | |
pass | |
wait = WebDriverWait(driver, 10) | |
login = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'Sign in'))) | |
login.click() | |
try: | |
element = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CLASS_NAME, 'account-name'))) | |
except Exception as ex: | |
print('My orders not found') | |
print(ex) | |
driver.quit() | |
exit() | |
def gotoorders(): | |
wait = WebDriverWait(driver, 10) | |
element = driver.find_element_by_class_name('account-name') | |
element.click() | |
orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders'))) | |
orders.click() | |
orders = wait.until(EC.element_to_be_clickable((By.LINK_TEXT,'My Orders'))) | |
def scrape_orders_page(): | |
ordertable = driver.find_element_by_id('buyer-ordertable') | |
for order in ordertable.find_elements_by_tag_name('tbody'): | |
# head = element.find_element_by_class_name('order-head') | |
# body = element.find_element_by_class_name('order-body') | |
title = order.find_element_by_class_name('product-title').text | |
status = order.find_element_by_class_name('order-status').text | |
time = order.find_element_by_xpath(".//span[text()[contains(.,'Order time')]]/following-sibling::span[1]").text | |
orderid = order.find_element_by_xpath(".//span[text()[contains(.,'Order ID')]]/following-sibling::span[1]").text | |
store = order.find_element_by_xpath(".//span[text()[contains(.,'Store name')]]/following-sibling::span[1]").text | |
for product in order.find_elements_by_class_name('product-sets'): | |
pamount = product.find_element_by_class_name('product-amount').find_elements_by_tag_name('span') | |
price, quantity = (elem.text for elem in pamount[:2]) | |
image = product.find_element_by_xpath(".//div[@class='product-left']/a/img").get_attribute('src').rsplit('_', 1)[0] | |
imagefile = image.split('/')[-1] | |
if not os.path.exists(imagefile): | |
try: | |
request.urlretrieve(image, imagefile) | |
except HTTPError: | |
print('Couldn\'t downloaded image:', image) | |
yield {'time': time, | |
'id': orderid, | |
'image': image, | |
'store': store, | |
'status': status, | |
'title': title, | |
'price': price, | |
'quantity': quantity} | |
def orders(): | |
gotoorders() | |
yield driver | |
yield from next_orders_page() | |
def next_orders_page(): | |
lb = driver.find_element_by_link_text('Next') | |
lb.click() | |
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.ID, 'buyer-ordertable'))) | |
yield driver | |
yield from next_orders_page() | |
login() | |
from itertools import islice | |
with open('list.csv', 'w') as f: | |
fw = csv.DictWriter(f, ATTRS) | |
fw.writeheader() | |
for i in islice(orders(), None): | |
for element in scrape_orders_page(): | |
fw.writerow(element) | |
# driver.save_screenshot('screen.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment