Skip to content

Instantly share code, notes, and snippets.

@dmasad
Created February 3, 2011 04:09
Show Gist options
  • Save dmasad/809038 to your computer and use it in GitHub Desktop.
Save dmasad/809038 to your computer and use it in GitHub Desktop.
DeviantArt Scraper v0.1
# DeviantArt Friend Scraper
# Uses BeautifulSoup to scrape Friends pages recursively and writes edges to a CSV file.
from urllib2 import urlopen, URLError
from BeautifulSoup import BeautifulSoup
import csv
def get_friends(username):
#Return a list of the named user's friends.
friends_list = [] #The list of the friends.
url = "http://" + username + ".deviantart.com/friends/"
try:
page = urlopen(url)
except URLError, e:
print "URLError: " + str(e)
return []
page_soup = BeautifulSoup(page) #Soup the page.
friends = page_soup.findAll("a", {"class" : "u"})
for f in friends:
friends_list.append(f.renderContents()) #Append the user name.
#TODO: Add functionality for more than one friends page.
return friends_list
def build_egonet(seed_user, writer, history, depth):
# Starting with a seed user, build an ego network and export it to CSV.
#Requires an open CSV writer
#history is a list of already-scraped users, to keep them from being written twice.
if depth>0:
print "Builing egonet for " + seed_user
friends = get_friends(seed_user)
for friend in friends:
if not (friend in history): #Check to see whether we've written this user yet.
writer.writerow([seed_user, friend])
history.append(friend)
for friend in friends:
build_egonet(friend, writer, history, depth-1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment