Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@tommyhuang1
Created February 12, 2017 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tommyhuang1/4e99de23e7d82fd6b8f8f56563becfa9 to your computer and use it in GitHub Desktop.
Save tommyhuang1/4e99de23e7d82fd6b8f8f56563becfa9 to your computer and use it in GitHub Desktop.
Web Scrapy Code
import requests
from bs4 import BeautifulSoup as bf
def ChangingURL(n):
return 'https://www.drugs.com/comments/gabapentin/?sort=rating&order=desc&sort_reviews=highest_rating&page='+str(n)
def get_review(soup):
ul=soup.find_all('div',{'class':'block-wrap comment-wrap'})
result = []
for DrugReview in ul:
d={}
try:
Condition = DrugReview.find('div', {'class':'user-comment'}).b.get_text()
try:
Review = DrugReview.find('div', {'class':'user-comment'}).span.get_text()
Rating = DrugReview.find('div',{'class': "rating-score"}).get_text()
except:
Rating = ""
finally:
d['Condition'] = Condition
d['Review'] = Review
d['Rating'] = Rating
result.append(d)
except:
pass
return result
import pandas as pd
appended_data = []
for page in range(1,50):
text =requests.get(ChangingURL(page)).text
soup =bf(text)
appended_data.append(pd.DataFrame(get_review(soup)))
appended_data = pd.concat(appended_data, axis=0).reset_index(drop=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment