Skip to content

Instantly share code, notes, and snippets.

@harishsg99
Created April 30, 2021 06:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save harishsg99/3c5ce07908b6b4a601c7f4a543341066 to your computer and use it in GitHub Desktop.
Save harishsg99/3c5ce07908b6b4a601c7f4a543341066 to your computer and use it in GitHub Desktop.
Script to scrape tn govt website
from autoscraper import AutoScraper
import pandas as pd
import json
import csv
url = 'https://stopcorona.tn.gov.in/beds.php'
wanted_list = ["Ariyalur","District","COVID BEDS","Sai Fertility Centre & Hospital, Vedhachalam Nagar","9941550979","8"]
scraper = AutoScraper()
result = scraper.build(url, wanted_list)
t = scraper.get_result_similar(url, grouped=True)
json_object = json.dumps(t)
print(json_object)
with open("sample.json", "w") as outfile:
outfile.write(json_object)
with open('sample.json') as json_file:
data = json.load(json_file)
employee_data = data['emp_details']
data_file = open('data_file.csv', 'w')
csv_writer = csv.writer(data_file)
count = 0
for emp in employee_data:
if count == 0:
# Writing headers of CSV file
header = emp.keys()
csv_writer.writerow(header)
count += 1
# Writing data of CSV file
csv_writer.writerow(emp.values())
data_file.close()
df = pd.read_csv("data_file.csv")
df.drop_duplicates(keep=False,inplace=True)
df.to_csv('scrapedcovid_data.csv', index = True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment