Skip to content

Instantly share code, notes, and snippets.

@uvegla
Created August 16, 2020 08:14
Show Gist options
  • Save uvegla/8a9e01f02b7c2845986f13061b7355d3 to your computer and use it in GitHub Desktop.
Save uvegla/8a9e01f02b7c2845986f13061b7355d3 to your computer and use it in GitHub Desktop.
MTG set playability scraper for EDH
#!/usr/bin/env python
import io
import json
import re
import zipfile
from pathlib import Path
from typing import Iterator, Set, List, Dict
import click
import requests
from slugify import slugify
FILTERED_CARDS = [
'island', 'forest', 'swamp', 'mountain', 'plains',
'snow-covered-island',
'snow-covered-forest',
'snow-covered-swamp',
'snow-covered-mountain',
'snow-covered-plains'
]
@click.command()
@click.option('-s', '--set-code', type=click.STRING, required=True)
def main(set_code: str):
set_code = set_code.upper()
fetch_set(set_code)
out_root = Path('out')
with out_root.joinpath(f'{set_code}.csv').open('w') as out:
for card in parse_set(set_code).cards:
try:
if card.edhrec_slug in FILTERED_CARDS:
continue
click.echo(f'{card.edhrec_slug}')
edhrec_fetch_card_data(card.edhrec_slug)
edhrec_data = parse_edhrec_data(card.edhrec_slug)
click.echo(f'{edhrec_data.decks} {edhrec_data.percentage}% {edhrec_data.all_decks}')
out.write('\t'.join([
card.name,
f'https://edhrec.com/cards/{card.edhrec_slug}',
str(edhrec_data.decks),
str(edhrec_data.percentage),
str(edhrec_data.all_decks),
str(edhrec_data.real_color_identity),
str(edhrec_data.simplified_color_identity),
str(edhrec_data.tcgplayer_price_trend),
str(edhrec_data.tcgplayer_url),
card.rarity,
','.join(card.printings)
]) + '\n')
except FileNotFoundError as e:
click.echo(f'Failed to get data on {card.edhrec_slug}: {str(e)}')
def fetch_set(set_code: str):
root_path = Path('sets')
set_path = root_path.joinpath(f'{set_code}.json')
if not set_path.exists():
r = requests.get(f'https://www.mtgjson.com/json/{set_code}.json.zip', stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(str(root_path))
else:
click.echo(f'The set file already exists at: {set_path.absolute()}')
class Card:
def __init__(self, name: str, rarity: str, printings: List[str]):
self.name = name
self.rarity = rarity
self.printings = printings
self.edhrec_slug = edhrec_slugify(self.name)
def __repr__(self):
return self.name
def __str__(self):
return self.name
def __hash__(self):
return hash(self.name)
def __eq__(self, other):
if type(other) is Card:
return self.name == other.name
return False
def iterate_cards_in_set(set_code: str) -> Iterator[Card]:
set_content = json.load(Path(f'sets/{set_code}.json').open('r'))
for card in set_content.get('cards', []):
if card.get('layout') in ['adventure', 'flip']:
name = card.get('names')[0]
else:
name = card['name'] if not card.get('names', []) else ' // '.join(card['names'])
yield Card(name, card.get('rarity', 'unknown').title(), card.get('printings', []))
class MtgSet:
def __init__(self, cards: Set[Card]):
self.cards = cards
def parse_set(set_code: str) -> MtgSet:
cards = set()
for card in iterate_cards_in_set(set_code):
cards.add(card)
return MtgSet(cards)
def edhrec_slugify(card_name: str) -> str:
normalized_card_name = card_name.replace('\'', '')
return slugify(normalized_card_name)
def edhrec_fetch_card_data(slug: str):
root_path = Path('edhrec')
file_path = root_path.joinpath(f'{slug}.json')
if not file_path.exists():
response = requests.get(f'https://edhrec-json.s3.amazonaws.com/en/cards/{slug}.json')
if response.status_code != 200:
raise FileNotFoundError(f'Card data not found on EDHREC: ')
with file_path.open('wb') as handler:
handler.write(response.content)
else:
# click.echo(f'Edhrec data already exists for {slug} at: {file_path.absolute()}')
pass
class EdhrecData:
def __init__(
self,
decks: int,
percentage: int,
all_decks: int,
color_identity: List[str],
prices: Dict[str, Dict]
):
self.decks = decks
self.percentage = percentage
self.all_decks = all_decks
self.color_identity = color_identity
self.prices = prices
@staticmethod
def unused() -> 'EdhrecData':
return EdhrecData(0, 0, 0, [], {})
@property
def real_color_identity(self):
return ''.join(self.color_identity)
@property
def simplified_color_identity(self):
if self.color_identity:
return self.color_identity[0] if len(self.color_identity) == 1 else 'multicolor'
return 'colorless'
@property
def cardmarket_price_trend(self):
cardmarket = self.prices.get('cardmarket', {})
return cardmarket.get('price', 0) if cardmarket else 0
@property
def cardmarket_url(self):
cardmarket = self.prices.get('cardmarket', {})
return cardmarket.get('url', '-') if cardmarket else '-'
@property
def tcgplayer_price_trend(self):
tcgplayer = self.prices.get('tcgplayer', {})
return tcgplayer.get('price', 0) if tcgplayer else 0
@property
def tcgplayer_url(self):
tcgplayer = self.prices.get('tcgplayer', {})
return tcgplayer.get('url', '-') if tcgplayer else '-'
def parse_edhrec_data(slug: str) -> EdhrecData:
root_path = Path('edhrec')
file_path = root_path.joinpath(f'{slug}.json')
with open(file_path, 'r') as handler:
data = json.load(handler)
try:
decks, percentage, all_decks = re.search(
r'In ([0-9]+) decks\n([0-9]+)% of ([0-9]+) decks', data['panels']['card']['label']
).groups()
return EdhrecData(
int(decks),
int(percentage),
int(all_decks),
data['panels']['card'].get('color_identity', []),
data['panels']['card'].get('prices', {})
)
except AttributeError:
return EdhrecData.unused()
if __name__ == '__main__':
main()
beautifulsoup4==4.8.2
certifi==2019.11.28
chardet==3.0.4
Click==7.0
idna==2.9
python-slugify==4.0.1
requests==2.23.0
soupsieve==2.0
text-unidecode==1.3
urllib3==1.25.8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment