Last active
June 27, 2016 11:48
-
-
Save dmn001/6390139c037949f7ea4b to your computer and use it in GitHub Desktop.
parse sold item date, name and price
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
from bs4 import BeautifulSoup | |
import re | |
of = open("all_output.txt", "wb") | |
for file_num in range(1,102): | |
filename = "%03d.json" % file_num | |
print filename | |
with open(filename) as data_file: | |
data = json.load(data_file) | |
html = data['results_html'] | |
doc = BeautifulSoup(html, "lxml") | |
rows = doc.find_all('div',class_="market_listing_row market_recent_listing_row") | |
for item in rows: | |
if item.find(text=re.compile("Buyer:")): | |
name = item.find('span',class_="market_listing_item_name").text.strip() | |
name = name.replace(',','_') | |
date = item.find('div',class_="market_listing_right_cell market_listing_listed_date").text.strip() | |
sold_price = item.find('span',class_="market_listing_price").text.strip() | |
# print "%s,%s,%s" % (date,name,sold_price) | |
of.write( ("%s,%s,%s\n" % (date,name,sold_price)).encode('utf-8') ) | |
of.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment