Skip to content

Instantly share code, notes, and snippets.

@MattyAyOh
Created December 21, 2016 19:03
Show Gist options
  • Save MattyAyOh/108cb7fa9dbe7c0fbed19487f0325e10 to your computer and use it in GitHub Desktop.
Save MattyAyOh/108cb7fa9dbe7c0fbed19487f0325e10 to your computer and use it in GitHub Desktop.
import csv
import requests
import re
try:
from bs4 import BeautifulSoup
except ImportError:
from BeautifulSoup import BeautifulSoup
g2URL = 'https://www.g2crowd.com/products/snagit/reviews?page='
g2UserURL = 'https://www.g2crowd.com/users/'
def getReviewPartForQuestion(fullReview, questionNumber):
questions = ['What do you like best?', 'What do you dislike?', 'Recommendations to others considering the product', 'What business problems are you solving? What benefits have you realized?']
returnVal = ''
if( questions[questionNumber] in fullReview ):
index = questionNumber
returnUntrimmed = fullReview.split(questions[index])[1]
failure = True
while( failure ):
try:
index += 1
if( index > 3 ):
failure = False
returnVal = returnUntrimmed.split(questions[index])[0]
failure = False
except:
continue
if( len(returnVal) == 0 ):
returnVal = returnUntrimmed
return returnVal
def writeReviewToCSV(response):
"output a list of issues to csv"
if not r.status_code == 200:
raise Exception(r.status_code)
html = response.text
parsed_html = BeautifulSoup(html, "html.parser")
usernames = parsed_html.body.find_all('div', attrs={'class':'hide-for-small-only'})
reviewTitles = parsed_html.body.find_all('h3', attrs={'class':'review-list-heading'})
reviewStars = parsed_html.body.find_all('div', attrs={'class':re.compile("stars large stars")})
reviewHelpfuls = parsed_html.body.find_all('div', attrs={'class':'result-box-number'})
reviewDates = parsed_html.body.find_all('time')
reviewBodies = parsed_html.body.find_all('div', attrs={'itemprop':'reviewBody'})
for reviewIndex in range(0,6):
print "Review: " + str(reviewIndex)
reviewTitle = reviewTitles[reviewIndex].text.encode('utf-8')
reviewStar = ''.join(reviewStars[reviewIndex].get('class'))
reviewStarInt = ''.join(x for x in reviewStar if x.isdigit())
reviewHelpful = reviewHelpfuls[reviewIndex].text.encode('utf-8')
reviewDate = reviewDates[reviewIndex].text.encode('utf-8')
reviewBody = reviewBodies[reviewIndex].text
whatLike = getReviewPartForQuestion(reviewBody,0).encode('utf-8')
whatDislike = getReviewPartForQuestion(reviewBody,1).encode('utf-8')
recommendation = getReviewPartForQuestion(reviewBody,2).encode('utf-8')
problems = getReviewPartForQuestion(reviewBody,3).encode('utf-8')
username = 'Anonymous'
userTitle = 'N/A'
userDate = 'N/A'
userDescription = 'N/A'
userinfo = usernames[reviewIndex*2]
userimg = userinfo.find('img')
isAnonymous = 'Anonymous' in userimg['alt']
if( isAnonymous ):
userTitle = userinfo.find('div', attrs={'class':'tiny-text'}).text.encode('utf-8')
else:
userfull = userinfo.find('span', attrs={'data-ue-widget':'safe-name'})
username = userfull.text.encode('utf-8')
userid = userfull['data-safe-name-id']
userResponse = requests.get(g2UserURL+str(userid))
if not userResponse.status_code == 200:
raise Exception(userResponse.status_code)
html = userResponse.text
parsed_html = BeautifulSoup(html, "html.parser")
try:
userDate = parsed_html.body.find('span', attrs={'class':'member-date'}).text.encode('utf-8')
except:
userDate = 'N/A'
try:
userTitle = parsed_html.body.find('h3', attrs={'class':'profile-user-role'}).text.encode('utf-8')
except:
userDate = 'N/A'
try:
userDescription = parsed_html.body.find('p').text.encode('utf-8')
except:
userDate = 'N/A'
csvout.writerow([username,userTitle,userDate,userDescription,reviewTitle,reviewStarInt,reviewHelpful,reviewDate,whatLike,whatDislike,recommendation,problems])
csvfile = 'g2-snagit-reviews2.csv'
csvout = csv.writer(open(csvfile, 'wb'))
csvout.writerow(('user', 'user title', 'user since', 'user description', 'review title', 'stars', 'helpful', 'date reviewed', 'what liked', 'what disliked', 'recommendations', 'problems solved'))
finalPageNumber = 63
for page in range(1,finalPageNumber):
print "Page: " + str(page)
r = requests.get(g2URL+str(page))
writeReviewToCSV(r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment