aucchen/itch_to_ifdb.py

## itch_to_ifdb.py
# 1. read data from an itch.io url
# 2. create an ifiction xml

import datetime
import json
import subprocess

from selectolax.parser import HTMLParser
import lxml.etree as etree
import requests

IFDB_Genres = []

class ItchData():

    fields = {'title', 'author', 'date', 'description', 'platform'}

    def __init__(self, url, title, author, release, desc, platform, cover,
            game_id,
            **params):
        self.url = url
        self.title = title
        self.author = author
        self.release = release
        self.desc = desc
        self.platform = platform
        self.cover = cover
        self.game_id = game_id
        self.params = params

    def __repr__(self):
        formatting_string = """
title: "{0}"
author: "{1}"
date: "{2}"
description: "{3}"
platform: "{4}" """.format(self.title, self.author, self.release, self.desc.strip().split('\n')[0] + '...', self.platform)
        return formatting_string

    def update_field(self, field, val):
        if field == 'title':
            self.title = val
        elif field == 'author':
            self.author = val
        elif field == 'release date' or field == 'release' or field == 'date':
            self.release = datetime.datetime.strptime(val, '%Y-%m-%d')
        elif field == 'description':
            self.desc = val
        elif field == 'platform':
            self.platform = val


def get_itch_data(url, use_short_desc=False, use_api=False, api_token=None):
    """
    Takes in an url of the form https://red-autumn.itch.io/pageant, and returns
    an ItchData object.
    """
    r = requests.get(url)
    tree = HTMLParser(r.content.decode('utf-8'))
    # 1. get game ID
    # <meta content="games/[id]" name="itch:path">
    game_id = tree.head.css_first('meta[name="itch:path"]').attributes['content']
    game_id = game_id.split('/')[1]
    # there's a json at https://api.itch.io/games/[id], but maybe we should just parse all the info from the html to avoid making another request.
    # also, using the api endpoint requires an api token
    # get title and author
    title_author = tree.head.css_first('title').text()
    title, author = title_author.split(' by ')
    # get full description
    desc = tree.css_first('div.formatted_description')
    desc = desc.text()
    # get release date, platform
    rows = tree.css('tr')
    release_date = None
    platform = None
    for row in rows:
        entries = list(row.iter(False))
        if entries[0].text() == 'Made with':
            platform = entries[1].text()
        if entries[0].text() == 'Release date' or entries[0].text() == 'Published':
            date = entries[1].css_first('abbr').attributes['title']
            release_date = datetime.datetime.strptime(date, '%d %B %Y @ %H:%M')
    # get cover image
    cover_url = tree.head.css_first('meta[property="og:image"]').attributes['content']
    # download cover
    cover_request = requests.get(cover_url)
    image_name = cover_url.split('/')[-1]
    with open(image_name, 'wb') as f:
        f.write(cover_request.content)
    # make the cover smaller
    subprocess.call('convert {0} -resize 400x300 {0}'.format(image_name), shell=True)
    if image_name.endswith('.gif'):
        subprocess.call('convert {0}[0] {0}'.format(image_name), shell=True)
    data = ItchData(url, title, author, release_date, desc, platform, image_name, game_id)
    return data


def get_itch_json(game_id, api_token):
    json_url = 'https://api.itch.io/games/' + game_id
    r_json = requests.get(json_url, headers={'Authorization': api_token})
    data = json.loads(r_json.content)
    data = data['game']
    return ItchData(
            data['url'],
            data['title'],
            data['user']['display_name'],
            datetime.datetime.strptime(data['published_at'], '%Y-%m-%dT%H:%M:%S.%f'),
            data['short_text'],
            None, # the field isn't here :(
            data['cover_url'],
            data['id']
            )


def create_xml(data):
    """
    Creates an ifiction XML using an ItchData object.
    """
    nsmap = {None: 'http://babel.ifarchive.org/protocol/iFiction/'}
    root = etree.Element('ifindex', nsmap=nsmap)
    story = etree.SubElement(root, 'story')
    # identification
    iden = etree.SubElement(story, 'identification')
    format = etree.SubElement(iden, 'format')
    format.text = data.platform

    # bibliographic
    bibliographic = etree.SubElement(story, 'bibliographic')
    title = etree.SubElement(bibliographic, 'title')
    title.text = data.title
    author = etree.SubElement(bibliographic, 'author')
    author.text = data.author
    desc = etree.SubElement(bibliographic, 'description')
    desc.text = data.desc.replace('\n', '<br/>')
    if data.release:
        firstpublished = etree.SubElement(bibliographic, 'firstpublished')
        firstpublished.text = data.release.strftime('%Y-%m-%d')

    # contacts
    contacts = etree.SubElement(story, 'contacts')
    url = etree.SubElement(contacts, 'url')
    url.text = data.url

    xml = etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8')
    print(xml.decode('utf-8'))
    return root


def create_links(data):
    nsmap = {None: 'http://ifdb.org/api/xmlns'}
    root = etree.Element('downloads', nsmap=nsmap)
    links = etree.SubElement(root, 'links')
    link = etree.SubElement(links, 'link')
    link_url = etree.SubElement(link, 'url')
    link_url.text = data.url
    title = etree.SubElement(link, 'title')
    title.text = 'itch.io'
    desc = etree.SubElement(link, 'desc')
    desc.text = 'Play on itch.io'
    format = etree.SubElement(link, 'format')
    format.text = 'html'
    return root


def run_pipeline(url=None, destination='http://ifdb.org/putific'):
    if not url:
        url = input('Enter an itch.io URL: ')
    data = get_itch_data(url)
    print('Data from itch.io: ')
    print(str(data))
    is_correct = input('Is this correct? (Y/N) ').lower()
    while is_correct == 'n':
        field = input('Which field should be corrected? ').lower()
        while field not in data.fields:
            field = input('Error: invalid field. Which field should be corrected? ').lower()
        if field == 'date':
            val = input('Enter a date (as YYYY-mm-dd): ')
        else:
            val = input('Enter the correct value: ')
        data.update_field(field, val)
        print('Updated data:')
        print(str(data))
        is_correct = input('Is this correct? (Y/N) ').lower()
    print('\nCreating xml for ifdb upload...\n')
    xml_root = create_xml(data)
    links = create_links(data)
    # TODO: create the wrapped file
    # get ifdb username/password
    ifdb_username = input('Enter your IFDB username: ')
    ifdb_password = input('Enter your IFDB password: ')
    upload_url = destination
    upload_data = {'username': ifdb_username, 'password': ifdb_password}
    output_etree = etree.tostring(xml_root, pretty_print=True, xml_declaration=True, encoding='utf-8')
    output_etree = output_etree.replace(b'&lt;br/&gt;', b'<br/>')
    params = {'username': ('', ifdb_username),
              'password': ('', ifdb_password),
              'ifiction': ('ifiction.xml', output_etree, 'text/xml'),
              'links': ('links.xml', etree.tostring(links, encoding='utf-8'), 'text/xml'),
              'coverart': (data.cover, open(data.cover, 'rb')),
              'requireIFID': ('', 'no')}
    r = requests.post(upload_url, data=upload_data, files=params)
    print()
    print('Response: ', r.content)
    return r


if __name__ == '__main__':
    result = run_pipeline()
	# 1. read data from an itch.io url
	# 2. create an ifiction xml

	import datetime
	import json
	import subprocess

	from selectolax.parser import HTMLParser
	import lxml.etree as etree
	import requests

	IFDB_Genres = []

	class ItchData():

	fields = {'title', 'author', 'date', 'description', 'platform'}

	def __init__(self, url, title, author, release, desc, platform, cover,
	game_id,
	**params):
	self.url = url
	self.title = title
	self.author = author
	self.release = release
	self.desc = desc
	self.platform = platform
	self.cover = cover
	self.game_id = game_id
	self.params = params

	def __repr__(self):
	formatting_string = """
	title: "{0}"
	author: "{1}"
	date: "{2}"
	description: "{3}"
	platform: "{4}" """.format(self.title, self.author, self.release, self.desc.strip().split('\n')[0] + '...', self.platform)
	return formatting_string

	def update_field(self, field, val):
	if field == 'title':
	self.title = val
	elif field == 'author':
	self.author = val
	elif field == 'release date' or field == 'release' or field == 'date':
	self.release = datetime.datetime.strptime(val, '%Y-%m-%d')
	elif field == 'description':
	self.desc = val
	elif field == 'platform':
	self.platform = val



	def get_itch_data(url, use_short_desc=False, use_api=False, api_token=None):
	"""
	Takes in an url of the form https://red-autumn.itch.io/pageant, and returns
	an ItchData object.
	"""
	r = requests.get(url)
	tree = HTMLParser(r.content.decode('utf-8'))
	# 1. get game ID
	# <meta content="games/[id]" name="itch:path">
	game_id = tree.head.css_first('meta[name="itch:path"]').attributes['content']
	game_id = game_id.split('/')[1]
	# there's a json at https://api.itch.io/games/[id], but maybe we should just parse all the info from the html to avoid making another request.
	# also, using the api endpoint requires an api token
	# get title and author
	title_author = tree.head.css_first('title').text()
	title, author = title_author.split(' by ')
	# get full description
	desc = tree.css_first('div.formatted_description')
	desc = desc.text()
	# get release date, platform
	rows = tree.css('tr')
	release_date = None
	platform = None
	for row in rows:
	entries = list(row.iter(False))
	if entries[0].text() == 'Made with':
	platform = entries[1].text()
	if entries[0].text() == 'Release date' or entries[0].text() == 'Published':
	date = entries[1].css_first('abbr').attributes['title']
	release_date = datetime.datetime.strptime(date, '%d %B %Y @ %H:%M')
	# get cover image
	cover_url = tree.head.css_first('meta[property="og:image"]').attributes['content']
	# download cover
	cover_request = requests.get(cover_url)
	image_name = cover_url.split('/')[-1]
	with open(image_name, 'wb') as f:
	f.write(cover_request.content)
	# make the cover smaller
	subprocess.call('convert {0} -resize 400x300 {0}'.format(image_name), shell=True)
	if image_name.endswith('.gif'):
	subprocess.call('convert {0}[0] {0}'.format(image_name), shell=True)
	data = ItchData(url, title, author, release_date, desc, platform, image_name, game_id)
	return data


	def get_itch_json(game_id, api_token):
	json_url = 'https://api.itch.io/games/' + game_id
	r_json = requests.get(json_url, headers={'Authorization': api_token})
	data = json.loads(r_json.content)
	data = data['game']
	return ItchData(
	data['url'],
	data['title'],
	data['user']['display_name'],
	datetime.datetime.strptime(data['published_at'], '%Y-%m-%dT%H:%M:%S.%f'),
	data['short_text'],
	None, # the field isn't here :(
	data['cover_url'],
	data['id']
	)


	def create_xml(data):
	"""
	Creates an ifiction XML using an ItchData object.
	"""
	nsmap = {None: 'http://babel.ifarchive.org/protocol/iFiction/'}
	root = etree.Element('ifindex', nsmap=nsmap)
	story = etree.SubElement(root, 'story')
	# identification
	iden = etree.SubElement(story, 'identification')
	format = etree.SubElement(iden, 'format')
	format.text = data.platform

	# bibliographic
	bibliographic = etree.SubElement(story, 'bibliographic')
	title = etree.SubElement(bibliographic, 'title')
	title.text = data.title
	author = etree.SubElement(bibliographic, 'author')
	author.text = data.author
	desc = etree.SubElement(bibliographic, 'description')
	desc.text = data.desc.replace('\n', '<br/>')
	if data.release:
	firstpublished = etree.SubElement(bibliographic, 'firstpublished')
	firstpublished.text = data.release.strftime('%Y-%m-%d')

	# contacts
	contacts = etree.SubElement(story, 'contacts')
	url = etree.SubElement(contacts, 'url')
	url.text = data.url

	xml = etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8')
	print(xml.decode('utf-8'))
	return root


	def create_links(data):
	nsmap = {None: 'http://ifdb.org/api/xmlns'}
	root = etree.Element('downloads', nsmap=nsmap)
	links = etree.SubElement(root, 'links')
	link = etree.SubElement(links, 'link')
	link_url = etree.SubElement(link, 'url')
	link_url.text = data.url
	title = etree.SubElement(link, 'title')
	title.text = 'itch.io'
	desc = etree.SubElement(link, 'desc')
	desc.text = 'Play on itch.io'
	format = etree.SubElement(link, 'format')
	format.text = 'html'
	return root


	def run_pipeline(url=None, destination='http://ifdb.org/putific'):
	if not url:
	url = input('Enter an itch.io URL: ')
	data = get_itch_data(url)
	print('Data from itch.io: ')
	print(str(data))
	is_correct = input('Is this correct? (Y/N) ').lower()
	while is_correct == 'n':
	field = input('Which field should be corrected? ').lower()
	while field not in data.fields:
	field = input('Error: invalid field. Which field should be corrected? ').lower()
	if field == 'date':
	val = input('Enter a date (as YYYY-mm-dd): ')
	else:
	val = input('Enter the correct value: ')
	data.update_field(field, val)
	print('Updated data:')
	print(str(data))
	is_correct = input('Is this correct? (Y/N) ').lower()
	print('\nCreating xml for ifdb upload...\n')
	xml_root = create_xml(data)
	links = create_links(data)
	# TODO: create the wrapped file
	# get ifdb username/password
	ifdb_username = input('Enter your IFDB username: ')
	ifdb_password = input('Enter your IFDB password: ')
	upload_url = destination
	upload_data = {'username': ifdb_username, 'password': ifdb_password}
	output_etree = etree.tostring(xml_root, pretty_print=True, xml_declaration=True, encoding='utf-8')
	output_etree = output_etree.replace(b'<br/>', b'<br/>')
	params = {'username': ('', ifdb_username),
	'password': ('', ifdb_password),
	'ifiction': ('ifiction.xml', output_etree, 'text/xml'),
	'links': ('links.xml', etree.tostring(links, encoding='utf-8'), 'text/xml'),
	'coverart': (data.cover, open(data.cover, 'rb')),
	'requireIFID': ('', 'no')}
	r = requests.post(upload_url, data=upload_data, files=params)
	print()
	print('Response: ', r.content)
	return r


	if __name__ == '__main__':
	result = run_pipeline()