Skip to content

Instantly share code, notes, and snippets.

@sash13
Last active August 28, 2023 18:01
Show Gist options
  • Save sash13/ec660294732f7cce92ccb5db426ffdf5 to your computer and use it in GitHub Desktop.
Save sash13/ec660294732f7cce92ccb5db426ffdf5 to your computer and use it in GitHub Desktop.
Mining names from petition.president.gov.ua
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import sys
import re
import requests
url_main = 'https://petition.president.gov.ua/petition/{}'
url_json = 'https://petition.president.gov.ua/petition/{}/votes/{}/json'
def get_pages_count(data):
r1 = re.findall(r"get_voters_page\('(\d+)'\)",data)
try:
return max(map(int,r1))
except:
return 1
def get_names(data):
names = []
soup = BeautifulSoup(data, 'html.parser' )
table = soup.find("div", { "class" : "table" })
for row in table.findAll("div", { "class" : "table_cell name" }):
names.append(row.text)
return names
def get_vote_page(number, page = 1):
page = url_json.format(number,page)
res = requests.get(page)
data = res.json()
return get_names(data['table_html'])
def get_main_page(number):
peoples = []
page = url_main.format(number)
res = requests.get(page)
data = res.content.decode('utf-8')
peoples += get_names(data)
pages_count = get_pages_count(data)
if pages_count > 1:
for page in range(2, pages_count+1):
peoples += get_vote_page(number, page)
print (len(peoples))
return peoples
if __name__ == '__main__':
try:
number = sys.argv[1]
except:
sys.exit('Using: python script.py <number>')
itemlist = get_main_page(number)
with open(number+ '_petition.txt', 'w', encoding='utf-8') as outfile:
outfile.write("\n".join(itemlist))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment