Skip to content

Instantly share code, notes, and snippets.

@desulaid
Created May 29, 2019 12:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save desulaid/1036fcfa9adece9a2c5c0e126f2fa34c to your computer and use it in GitHub Desktop.
Save desulaid/1036fcfa9adece9a2c5c0e126f2fa34c to your computer and use it in GitHub Desktop.
from requests import get
from lxml import html
from openpyxl.workbook import Workbook
from openpyxl.styles import Font
from re import findall
BASE_URL = 'https://kiportal.ru'
PAGE_START, PAGE_FINISH = 20, 21
wb = Workbook()
sheet = wb.active
sheet.title = 'Реестр членов СРО'
wb_headers = [
'Фамилия',
'Имя',
'Отчество',
'№ в реестре СРО',
'№ Протокола приема',
'Дата протокола приема',
'№ аттестата КИ',
'Дата выдачи аттестата',
'Реестровый номер в ГРКИ',
'Сведения об образовании',
'СНИЛС',
'ИНН',
'Почтовый адрес для связи с КИ',
'Рабочий тел.',
'Мобильный тел.',
'e-mail для связи'
]
user_row = 3
for i in range(ord('A'), ord('A') + len(wb_headers)):
# sheet[f'{chr(i)}1'] = wb_headers[i-65]
# sheet[f'{chr(i)}1'].font = Font(bold=True)
pass
for page in range(PAGE_START, PAGE_FINISH + 1):
# print(f'Сейчас мы на {page} странице')
url = f'{BASE_URL}/reestr/reestry-sro/reestr-chlenov/{page}.html'
html_doc = get(url).text
root = html.fromstring(html_doc)
links = [i for i in root.xpath('//tbody//tr//td//a//@href')]
user_id = 1
for person in links:
# print(f'А сейчас мы чекаем {user_id} человечека')
user_id += 1
html_doc = get(f'{BASE_URL}{person}').text
root = html.fromstring(html_doc)
if root.xpath('//div[@class="element element-text first"]//p//text()') == 'Исключен':
continue
header = [i[1:] for i in root.xpath('*//div[@id="tab-reestr"]//table//tr//td[1]//text()')]
if header == None:
continue
data = [i for i in root.xpath('*//div[@id="tab-reestr"]//table//tr//td[2]//text()')]
name = findall(r'\w+', ''.join(root.xpath('//title//text()')))
# print(data)
# table = {
# 'Фамилия': name[0],
# 'Имя': name[1],
# 'Отчество': name[2]
# }
# for i in range(0, len(header)):
# table.update({
# header[i]: data[i]
# })
data = name + data[3:8] + data[10:11]
index = 0
for info in data:
index += 1
sheet.cell(user_row, index, str(info))
user_row += 1
wb.save(f'./{BASE_URL[8:]}.xlsx')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment