Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save BlaayLock/ad8ee07bcd621bbedfa66909a4242967 to your computer and use it in GitHub Desktop.
Save BlaayLock/ad8ee07bcd621bbedfa66909a4242967 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
from lxml import html
#~ response = requests.get('https://www.yahoo.com/news/')
#~ parsed_body = html.fromstring(response.text)
responsetext='<ul><li><span>Model No.:</span><em>CN-374181929</em></li><li><span>Material:</span><em>ABS</em></li><li><span>Product Size:</span><em>4.5cm</em></li><li><span>Product Weight:</span><em>0.053kg</em></li><li><span>Features:</span><em>Phthalates Free</em></li><li></li><li><span>Package Size:</span><em>13*7.5*4.9cm</em></li><li><span>Package Weight:</span><em>0.072kg</em></li><li><span>Package Type:</span><em>colorbox</em></li><li></li><li><span>QTY/CTN:</span><em>160PCS</em></li><li><span>Case Size:</span><em>58.5*43*40cm</em></li><li><span>N.W:</span><em>8.5kg</em></li><li><span>G.W:</span><em>10.5kg</em></li><li><span>Volume:</span><em>0.1CBM</em></li></ul>'
parsed_body = html.fromstring(responsetext)
data = parsed_body.xpath(".//li")
print len(data)
for i in range(len(data)):
print(data[i])
datasub=data[i]
for node in datasub: # Перебираем элементы
print('text =',[node.text]) #text = ['Sign In']
print(node.values()) #['/accounts/signin/']
print(node.keys()) #['href']
print(node.tag) #a
#~ print(node.content()) #['/accounts/signin/']
print node.get("data-current")
#~ print('-------------------------');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment