Skip to content

Instantly share code, notes, and snippets.

@ruchej
Last active July 15, 2022 13:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ruchej/21bdcf81e899a61cdd3362c017d34557 to your computer and use it in GitHub Desktop.
Save ruchej/21bdcf81e899a61cdd3362c017d34557 to your computer and use it in GitHub Desktop.
def parsing_one_group(url: str) -> Dict[str, list[str]]:
'''
Парсинг одной группы по букве.
Возвращает список животных в словаре
{"А": "Аардоникс"}
'''
page = requests.get(url).text
soup = BeautifulSoup(page, 'lxml')
group = soup.find(id='mw-pages').find('div', class_='mw-category-group')
letter = group.find('h3').text
animals = {letter: []}
while letter == group.find('h3').text:
animals_list = group.find('ul').find_all('li')
print(f'Парсим категорию {letter}')
for animal in animals_list:
animals[letter].append(animal.text)
link_next_page_obj = soup.find('a', text='Следующая страница')
if link_next_page_obj:
next_page = link_next_page_obj.get('href')
url = f"https://ru.wikipedia.org/{next_page}"
page = requests.get(url).text
soup = BeautifulSoup(page, 'lxml')
group = soup.find(id='mw-pages').find('div', class_='mw-category-group')
return animals
def get_animals(url: str, abc: set) -> Dict[str, str]:
'''
Получить список животных согласно алфавита abc.
Вернуть словарь с животными
'''
animals = dict()
links = gen_urls_abc(url, abc)
for link in links:
group = parsing_one_group(link)
animals.update(group)
return animals
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment