Created
August 16, 2020 08:14
-
-
Save uvegla/8a9e01f02b7c2845986f13061b7355d3 to your computer and use it in GitHub Desktop.
MTG set playability scraper for EDH
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import io | |
import json | |
import re | |
import zipfile | |
from pathlib import Path | |
from typing import Iterator, Set, List, Dict | |
import click | |
import requests | |
from slugify import slugify | |
FILTERED_CARDS = [ | |
'island', 'forest', 'swamp', 'mountain', 'plains', | |
'snow-covered-island', | |
'snow-covered-forest', | |
'snow-covered-swamp', | |
'snow-covered-mountain', | |
'snow-covered-plains' | |
] | |
@click.command() | |
@click.option('-s', '--set-code', type=click.STRING, required=True) | |
def main(set_code: str): | |
set_code = set_code.upper() | |
fetch_set(set_code) | |
out_root = Path('out') | |
with out_root.joinpath(f'{set_code}.csv').open('w') as out: | |
for card in parse_set(set_code).cards: | |
try: | |
if card.edhrec_slug in FILTERED_CARDS: | |
continue | |
click.echo(f'{card.edhrec_slug}') | |
edhrec_fetch_card_data(card.edhrec_slug) | |
edhrec_data = parse_edhrec_data(card.edhrec_slug) | |
click.echo(f'{edhrec_data.decks} {edhrec_data.percentage}% {edhrec_data.all_decks}') | |
out.write('\t'.join([ | |
card.name, | |
f'https://edhrec.com/cards/{card.edhrec_slug}', | |
str(edhrec_data.decks), | |
str(edhrec_data.percentage), | |
str(edhrec_data.all_decks), | |
str(edhrec_data.real_color_identity), | |
str(edhrec_data.simplified_color_identity), | |
str(edhrec_data.tcgplayer_price_trend), | |
str(edhrec_data.tcgplayer_url), | |
card.rarity, | |
','.join(card.printings) | |
]) + '\n') | |
except FileNotFoundError as e: | |
click.echo(f'Failed to get data on {card.edhrec_slug}: {str(e)}') | |
def fetch_set(set_code: str): | |
root_path = Path('sets') | |
set_path = root_path.joinpath(f'{set_code}.json') | |
if not set_path.exists(): | |
r = requests.get(f'https://www.mtgjson.com/json/{set_code}.json.zip', stream=True) | |
z = zipfile.ZipFile(io.BytesIO(r.content)) | |
z.extractall(str(root_path)) | |
else: | |
click.echo(f'The set file already exists at: {set_path.absolute()}') | |
class Card: | |
def __init__(self, name: str, rarity: str, printings: List[str]): | |
self.name = name | |
self.rarity = rarity | |
self.printings = printings | |
self.edhrec_slug = edhrec_slugify(self.name) | |
def __repr__(self): | |
return self.name | |
def __str__(self): | |
return self.name | |
def __hash__(self): | |
return hash(self.name) | |
def __eq__(self, other): | |
if type(other) is Card: | |
return self.name == other.name | |
return False | |
def iterate_cards_in_set(set_code: str) -> Iterator[Card]: | |
set_content = json.load(Path(f'sets/{set_code}.json').open('r')) | |
for card in set_content.get('cards', []): | |
if card.get('layout') in ['adventure', 'flip']: | |
name = card.get('names')[0] | |
else: | |
name = card['name'] if not card.get('names', []) else ' // '.join(card['names']) | |
yield Card(name, card.get('rarity', 'unknown').title(), card.get('printings', [])) | |
class MtgSet: | |
def __init__(self, cards: Set[Card]): | |
self.cards = cards | |
def parse_set(set_code: str) -> MtgSet: | |
cards = set() | |
for card in iterate_cards_in_set(set_code): | |
cards.add(card) | |
return MtgSet(cards) | |
def edhrec_slugify(card_name: str) -> str: | |
normalized_card_name = card_name.replace('\'', '') | |
return slugify(normalized_card_name) | |
def edhrec_fetch_card_data(slug: str): | |
root_path = Path('edhrec') | |
file_path = root_path.joinpath(f'{slug}.json') | |
if not file_path.exists(): | |
response = requests.get(f'https://edhrec-json.s3.amazonaws.com/en/cards/{slug}.json') | |
if response.status_code != 200: | |
raise FileNotFoundError(f'Card data not found on EDHREC: ') | |
with file_path.open('wb') as handler: | |
handler.write(response.content) | |
else: | |
# click.echo(f'Edhrec data already exists for {slug} at: {file_path.absolute()}') | |
pass | |
class EdhrecData: | |
def __init__( | |
self, | |
decks: int, | |
percentage: int, | |
all_decks: int, | |
color_identity: List[str], | |
prices: Dict[str, Dict] | |
): | |
self.decks = decks | |
self.percentage = percentage | |
self.all_decks = all_decks | |
self.color_identity = color_identity | |
self.prices = prices | |
@staticmethod | |
def unused() -> 'EdhrecData': | |
return EdhrecData(0, 0, 0, [], {}) | |
@property | |
def real_color_identity(self): | |
return ''.join(self.color_identity) | |
@property | |
def simplified_color_identity(self): | |
if self.color_identity: | |
return self.color_identity[0] if len(self.color_identity) == 1 else 'multicolor' | |
return 'colorless' | |
@property | |
def cardmarket_price_trend(self): | |
cardmarket = self.prices.get('cardmarket', {}) | |
return cardmarket.get('price', 0) if cardmarket else 0 | |
@property | |
def cardmarket_url(self): | |
cardmarket = self.prices.get('cardmarket', {}) | |
return cardmarket.get('url', '-') if cardmarket else '-' | |
@property | |
def tcgplayer_price_trend(self): | |
tcgplayer = self.prices.get('tcgplayer', {}) | |
return tcgplayer.get('price', 0) if tcgplayer else 0 | |
@property | |
def tcgplayer_url(self): | |
tcgplayer = self.prices.get('tcgplayer', {}) | |
return tcgplayer.get('url', '-') if tcgplayer else '-' | |
def parse_edhrec_data(slug: str) -> EdhrecData: | |
root_path = Path('edhrec') | |
file_path = root_path.joinpath(f'{slug}.json') | |
with open(file_path, 'r') as handler: | |
data = json.load(handler) | |
try: | |
decks, percentage, all_decks = re.search( | |
r'In ([0-9]+) decks\n([0-9]+)% of ([0-9]+) decks', data['panels']['card']['label'] | |
).groups() | |
return EdhrecData( | |
int(decks), | |
int(percentage), | |
int(all_decks), | |
data['panels']['card'].get('color_identity', []), | |
data['panels']['card'].get('prices', {}) | |
) | |
except AttributeError: | |
return EdhrecData.unused() | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beautifulsoup4==4.8.2 | |
certifi==2019.11.28 | |
chardet==3.0.4 | |
Click==7.0 | |
idna==2.9 | |
python-slugify==4.0.1 | |
requests==2.23.0 | |
soupsieve==2.0 | |
text-unidecode==1.3 | |
urllib3==1.25.8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment