Skip to content

Instantly share code, notes, and snippets.

@smithaam
Last active August 30, 2017 00:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save smithaam/3e2b51321b8342aca68b3b779a51e750 to your computer and use it in GitHub Desktop.
Save smithaam/3e2b51321b8342aca68b3b779a51e750 to your computer and use it in GitHub Desktop.
import time
from selenium import webdriver
driver =None
outFile = None
def init():
chrome_path=r"C:\webdriver\chromedriver.exe"
global driver
driver=webdriver.Chrome(chrome_path)
def openPage(location, speciality, pageNum):
driver.get("http://www.vitals.com/search?keyword="+location+"&display_type=Doctor&specialty="+speciality+"&page="+format(pageNum))
time.sleep(2)
def extractData(location,speciality):
cards = driver.find_elements_by_class_name("card-animation")
count=0
for card in cards:
#print(card.text)
count = count + 1
drNameList = card.find_elements_by_class_name("resultsCard__header__name")
drName = ', '.join(str(p.text) for p in drNameList)
drMetaList = card.find_elements_by_class_name("resultsCard__header__meta")
drMeta = ', '.join(str(p.text.replace("\n", " ")) for p in drMetaList)
drRatingCountList = card.find_elements_by_class_name("resultsCard__rating__ratingCount")
drRatingCount = ', '.join(str(p.text.replace("\n", " ")) for p in drRatingCountList)
drRatingList = card.find_elements_by_class_name("number")
drRating = ', '.join(str(p.text.replace("\n", " ")) for p in drRatingList)
drAddressList = card.find_elements_by_class_name("resultsCard__address")
drAddress = ', '.join(str(p.text.replace("\n", " ")) for p in drAddressList)
csvLine= "\""+ speciality.replace("%20"," ") + "\",\""+ drName +"\",\""
+ drMeta + "\",\""+drAddress+ "\",\"" +drRatingCount+ "\",\""+ drRating +"\""
print csvLine
with open("data-"+location+".csv", "a") as outFile:
outFile.write(csvLine+"\n")
return count
specialities = ["Allergy%20and%20Immunology", "Colon%20and%20Rectal%20Surgery", "Dermatology",
"Family%20Medicine", "General%20Practice", "Internal%20Medicine", "Medical%20Genetics",
"Neurological%20Surgery", "Sports%20Medicine", "Obstetrics%20and%20Gynecology",
"Ophthalmology", "Oral%20and%20Maxillofacial%20Surgery", "Orthopedic%20Surgery",
"Otolaryngology", "Pediatrics", "Physical%20Medicine%20and%20Rehabilitation",
"Plastic%20Surgery", "Anesthesiology", "General%20Surgery", "Thoracic%20Surgery",
"Urology", "Geriatric%20Medicine", "Cardiology", "Endocrinology,%20Diabetes%20and%20Metabolism",
"Gastroenterology", "Infectious%20Disease", "Nephrology", "Neurology", "Hematology%20and%20Oncology",
"Child%20Psychiatry", "Pediatric%20Surgery", "Psychiatry", "Pulmonary%20Disease",
"Pain%20Management", "Preventive%20Medicine", "Bariatric%20Medicine", "Critical%20Care%20Medicine",
"Addiction%20Medicine", "Adolescent%20Medicine", "Emergency%20Medicine", "Hematology",
"Hepatology", "Hospice%20and%20Palliative%20Medicine", "Nuclear%20Medicine",
"Osteopathic%20Manipulative%20Medicine", "Pathology", "Radiology", "Rheumatology",
"Sleep%20Medicine", "Hospitalist", "Radiation%20Oncology", "Primary%20Care" ]
init()
location="boston"
for speciality in specialities:
page=0
count=-1
while( count!=0):
openPage(location, speciality, page)
count=extractData(location, speciality)
page=page+1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment