Skip to content

Instantly share code, notes, and snippets.

@pankaj28843
Created October 6, 2014 15:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pankaj28843/30840626fa7cea1155d4 to your computer and use it in GitHub Desktop.
Save pankaj28843/30840626fa7cea1155d4 to your computer and use it in GitHub Desktop.
import re
import requests
from lxml import etree
REGEX_FIND_NUMBER = re.compile(r'\d*\.\d+|\d+')
REGEX_FIND_COMMA = re.compile(r'\s*,\s*')
def get_price_from_text(text):
# strip the text
text = text.strip()
# remove comma
text = REGEX_FIND_COMMA.sub('', text)
# find decimal string
decimals = REGEX_FIND_NUMBER.findall(text)
# price string should be first one
price_string = decimals[0]
# convert to float and return
return float(price_string)
def get_text_for_etree_node(node):
# get text for current node
text = node.text or ''
# get tail for current node
tail = node.tail or ''
# get text for all children
children_text = ''.join(map(get_text_for_etree_node, node))
return text + children_text + tail
def get_price_from_amazon(url):
req = requests.get(url)
root = etree.HTML(req.text)
target_element = root.find(".//span[@id='priceblock_ourprice']")
target_element_text = get_text_for_etree_node(target_element)
price = get_price_from_text(target_element_text)
return price
if __name__ == "__main__":
url = "http://www.amazon.in/gp/product/B00MMKAVR8/"
price = get_price_from_amazon(url)
print "Price is : {}\n".format(price)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment