Skip to content

Instantly share code, notes, and snippets.

@natalyjazzviolin
Last active June 14, 2021 18:11
Show Gist options
  • Save natalyjazzviolin/d4f228822ab8b9abb21b47362ce0ad35 to your computer and use it in GitHub Desktop.
Save natalyjazzviolin/d4f228822ab8b9abb21b47362ce0ad35 to your computer and use it in GitHub Desktop.
Scraping emails from known database.
from bs4 import BeautifulSoup
import requests
import pandas as pd
from time import sleep
import json
import csv
update = [
' DATA URL ', #Changed due to privacy concerns.
]
# Start the session
session = requests.Session()
try:
# Create the payload
payload = {'username':'USERNAME', #Changed due to privacy concerns.
'password':'PASSWORD' #Changed due to privacy concerns.
}
# Post the payload to the site to log in
s = session.post(" LOGIN URL ", data=payload) #Changed due to privacy concerns.
userList = []
for page in update:
# Navigate to the next page and scrape the data
s = session.get(page)
pageCount = 1
# Create new BeautifulSoup object
soup = BeautifulSoup(s.content, 'html.parser')
for table_row in soup.select("table tr"):
#Find each cell
cells = table_row.findAll('td')
d = dict()
if len(cells) > 0:
d['fullName'] = cells[0].text.strip()
d['email'] = cells[2].text.strip()
d['memberType'] = cells[6].text.strip()
d['status'] = cells[7].text.strip()
userList.append(d)
sleep(10)
#Write data into json file
with open('userData.json', 'w') as f:
json.dump(userList, f)
#Open CSV file for writing
dataFile = open('userData.csv', 'w')
#Create CSV writer object
csvWriter = csv.writer(dataFile)
#Counter variable used for writing headers to the CSV file
count = 0
for d in userList:
if count == 0:
#Writing headers of CSV file
header = d.keys()
csvWriter.writerow(header)
count += 1
#Writing data of CSV file
csvWriter.writerow(d.values())
dataFile.close()
print(userList)
except:
print("An error occured.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment