AndrewPardoe/UserVoice.py

## UserVoice.py
# UserVoice doesn't have a search capability that will filter on category. I only care about my category (C++) in a huge
# Visual Studio database. This script scrapes all UserVoice suggestions in my category with links into an HTML document.
# Improvements welcome from those who actually know Python--this is the first Python script I've ever needed to write.

import re
import requests
import urllib.request
from bs4 import BeautifulSoup

# Whack any Unicode characters when printing to file. Not correct, but not crashing.
def safewrite(file, string):
    try:
        file.write(string)
    except UnicodeEncodeError:
        for char in string:
            try:
                file.write(char)
            except UnicodeEncodeError:
                file.write("?")

# Specifics of my UserVoice page and category
prefix = 'https://visualstudio.uservoice.com'
firstPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c'
nextPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c/filters/top?page={}'

# Ideas are split across many pages. Find all page numbers from the Pagination control.
soup = BeautifulSoup(requests.get(firstPage).content, "lxml")
pagination = soup.find("div", attrs={'class':'uvPagination'})
def pageeq(href):
    return href and re.compile("page=").search(href)
redigits=re.compile('\d+')

# Loop through and find the last page number. Could probably write a better regex above.
i = 0
for pageref in pagination.find_all(href=pageeq):
    x = redigits.findall(pageref.string)
    if x:
        i = x[0]
upper = int(i) + 1

# Create a local HTML page with a list of my category's UserVoice links
outfile = open('UserVoice.html', 'w')
outfile.write("<html>\n<head>\n<title>UserVoice items from {0}</title>\n</head>\n".format(firstPage))
outfile.write("<body>\n<h2>UserVoice items from {0}</h2>\n<ul>\n".format(firstPage))

# Run through every page, find the links, print the prefix, link, and link title
for page in range(1, upper):
    print ("Processing page {0} of {1}".format(page, upper - 1))
    soup = BeautifulSoup(requests.get(nextPage.format(page)).content, "lxml")
    for header in soup.find_all("h2", class_="uvIdeaTitle"):
        outfile.write("\t<li><a href=\"{0}{1}\">".format(prefix, header.a.get('href')))
        safewrite(outfile, header.a.string)
        outfile.write("</a/></li>\n")

# Close out the HTML page
outfile.write("</ul>\n</body>\n</html>\n")
outfile.close()
	# UserVoice doesn't have a search capability that will filter on category. I only care about my category (C++) in a huge
	# Visual Studio database. This script scrapes all UserVoice suggestions in my category with links into an HTML document.
	# Improvements welcome from those who actually know Python--this is the first Python script I've ever needed to write.

	import re
	import requests
	import urllib.request
	from bs4 import BeautifulSoup

	# Whack any Unicode characters when printing to file. Not correct, but not crashing.
	def safewrite(file, string):
	try:
	file.write(string)
	except UnicodeEncodeError:
	for char in string:
	try:
	file.write(char)
	except UnicodeEncodeError:
	file.write("?")

	# Specifics of my UserVoice page and category
	prefix = 'https://visualstudio.uservoice.com'
	firstPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c'
	nextPage = prefix + '/forums/121579-visual-studio-ide/category/30937-languages-c/filters/top?page={}'

	# Ideas are split across many pages. Find all page numbers from the Pagination control.
	soup = BeautifulSoup(requests.get(firstPage).content, "lxml")
	pagination = soup.find("div", attrs={'class':'uvPagination'})
	def pageeq(href):
	return href and re.compile("page=").search(href)
	redigits=re.compile('\d+')

	# Loop through and find the last page number. Could probably write a better regex above.
	i = 0
	for pageref in pagination.find_all(href=pageeq):
	x = redigits.findall(pageref.string)
	if x:
	i = x[0]
	upper = int(i) + 1

	# Create a local HTML page with a list of my category's UserVoice links
	outfile = open('UserVoice.html', 'w')
	outfile.write("<html>\n<head>\n<title>UserVoice items from {0}</title>\n</head>\n".format(firstPage))
	outfile.write("<body>\n<h2>UserVoice items from {0}</h2>\n<ul>\n".format(firstPage))

	# Run through every page, find the links, print the prefix, link, and link title
	for page in range(1, upper):
	print ("Processing page {0} of {1}".format(page, upper - 1))
	soup = BeautifulSoup(requests.get(nextPage.format(page)).content, "lxml")
	for header in soup.find_all("h2", class_="uvIdeaTitle"):
	outfile.write("\t<li><a href=\"{0}{1}\">".format(prefix, header.a.get('href')))
	safewrite(outfile, header.a.string)
	outfile.write("</a/></li>\n")

	# Close out the HTML page
	outfile.write("</ul>\n</body>\n</html>\n")
	outfile.close()