Last active
June 14, 2021 18:11
-
-
Save natalyjazzviolin/d4f228822ab8b9abb21b47362ce0ad35 to your computer and use it in GitHub Desktop.
Scraping emails from known database.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import requests | |
import pandas as pd | |
from time import sleep | |
import json | |
import csv | |
update = [ | |
' DATA URL ', #Changed due to privacy concerns. | |
] | |
# Start the session | |
session = requests.Session() | |
try: | |
# Create the payload | |
payload = {'username':'USERNAME', #Changed due to privacy concerns. | |
'password':'PASSWORD' #Changed due to privacy concerns. | |
} | |
# Post the payload to the site to log in | |
s = session.post(" LOGIN URL ", data=payload) #Changed due to privacy concerns. | |
userList = [] | |
for page in update: | |
# Navigate to the next page and scrape the data | |
s = session.get(page) | |
pageCount = 1 | |
# Create new BeautifulSoup object | |
soup = BeautifulSoup(s.content, 'html.parser') | |
for table_row in soup.select("table tr"): | |
#Find each cell | |
cells = table_row.findAll('td') | |
d = dict() | |
if len(cells) > 0: | |
d['fullName'] = cells[0].text.strip() | |
d['email'] = cells[2].text.strip() | |
d['memberType'] = cells[6].text.strip() | |
d['status'] = cells[7].text.strip() | |
userList.append(d) | |
sleep(10) | |
#Write data into json file | |
with open('userData.json', 'w') as f: | |
json.dump(userList, f) | |
#Open CSV file for writing | |
dataFile = open('userData.csv', 'w') | |
#Create CSV writer object | |
csvWriter = csv.writer(dataFile) | |
#Counter variable used for writing headers to the CSV file | |
count = 0 | |
for d in userList: | |
if count == 0: | |
#Writing headers of CSV file | |
header = d.keys() | |
csvWriter.writerow(header) | |
count += 1 | |
#Writing data of CSV file | |
csvWriter.writerow(d.values()) | |
dataFile.close() | |
print(userList) | |
except: | |
print("An error occured.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment