Skip to content

Instantly share code, notes, and snippets.

@nukeop nukeop/scrape.py
Created Nov 4, 2017

Embed
What would you like to do?
import requests
import re
import urllib.request
import tqdm
from bs4 import BeautifulSoup, SoupStrainer
page = requests.get('http://mightandmagic.wikia.com/wiki/Category:Heroes_V_hero_icons')
page = page.text
page = BeautifulSoup(page, 'lxml')
page = page.find_all('a', href=re.compile('.*File:Hero.*png'))
links = ['http://mightandmagic.wikia.com' + x.get('href') for x in page]
links = list(set(links))
for x in tqdm.tqdm(links):
avatarpage = requests.get(x)
avatarpage = avatarpage.text
avatarpage = BeautifulSoup(avatarpage, 'lxml')
avatarpage = avatarpage.find_all('a', href=re.compile('.*vignette.*png.*format=original.*'))
avatarlink = avatarpage[0].get('href')
urllib.request.urlretrieve(avatarlink, avatarlink.split('/')[7])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.