Created
February 3, 2011 04:09
-
-
Save dmasad/809038 to your computer and use it in GitHub Desktop.
DeviantArt Scraper v0.1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DeviantArt Friend Scraper | |
# Uses BeautifulSoup to scrape Friends pages recursively and writes edges to a CSV file. | |
from urllib2 import urlopen, URLError | |
from BeautifulSoup import BeautifulSoup | |
import csv | |
def get_friends(username): | |
#Return a list of the named user's friends. | |
friends_list = [] #The list of the friends. | |
url = "http://" + username + ".deviantart.com/friends/" | |
try: | |
page = urlopen(url) | |
except URLError, e: | |
print "URLError: " + str(e) | |
return [] | |
page_soup = BeautifulSoup(page) #Soup the page. | |
friends = page_soup.findAll("a", {"class" : "u"}) | |
for f in friends: | |
friends_list.append(f.renderContents()) #Append the user name. | |
#TODO: Add functionality for more than one friends page. | |
return friends_list | |
def build_egonet(seed_user, writer, history, depth): | |
# Starting with a seed user, build an ego network and export it to CSV. | |
#Requires an open CSV writer | |
#history is a list of already-scraped users, to keep them from being written twice. | |
if depth>0: | |
print "Builing egonet for " + seed_user | |
friends = get_friends(seed_user) | |
for friend in friends: | |
if not (friend in history): #Check to see whether we've written this user yet. | |
writer.writerow([seed_user, friend]) | |
history.append(friend) | |
for friend in friends: | |
build_egonet(friend, writer, history, depth-1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment