chrisguitarguy/get-links.py

## get-links.py
from BeautifulSoup import BeautifulSoup as Soup
from urllib2 import urlopen

def get_product_links(url):
	try:
		content = urlopen(url).read()
	except:
		return False
	soup = Soup(content)

	products = soup.findAll('h2', {'class': 'product-name'})

	links = []
	for p in products:
		try:
			a = p.find('a')['href']
		except:
			continue
		else:
			links.append(a)
	return links


if __name__ == '__main__':
	from sys import argv, exit
	if len(argv) < 3:
		print 'usage: %s URL output_file' % argv[0]
		exit()
	else:
		links = get_product_links(argv[1])
		out = open(argv[2], 'w')
                if not out:
                        print "error!"
                        exit()
		for link in links:
			out.write(link + '\n')
		out.close()
		exit()
	from BeautifulSoup import BeautifulSoup as Soup
	from urllib2 import urlopen

	def get_product_links(url):
	try:
	content = urlopen(url).read()
	except:
	return False
	soup = Soup(content)

	products = soup.findAll('h2', {'class': 'product-name'})

	links = []
	for p in products:
	try:
	a = p.find('a')['href']
	except:
	continue
	else:
	links.append(a)
	return links


	if __name__ == '__main__':
	from sys import argv, exit
	if len(argv) < 3:
	print 'usage: %s URL output_file' % argv[0]
	exit()
	else:
	links = get_product_links(argv[1])
	out = open(argv[2], 'w')
	if not out:
	print "error!"
	exit()
	for link in links:
	out.write(link + '\n')
	out.close()
	exit()