Skip to content

Instantly share code, notes, and snippets.

@aucchen
Last active May 24, 2022 09:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aucchen/f5e05817e71d5161a1d4eb0adce13877 to your computer and use it in GitHub Desktop.
Save aucchen/f5e05817e71d5161a1d4eb0adce13877 to your computer and use it in GitHub Desktop.
# 1. read data from an itch.io url
# 2. create an ifiction xml
import datetime
import json
import subprocess
from selectolax.parser import HTMLParser
import lxml.etree as etree
import requests
IFDB_Genres = []
class ItchData():
fields = {'title', 'author', 'date', 'description', 'platform'}
def __init__(self, url, title, author, release, desc, platform, cover,
game_id,
**params):
self.url = url
self.title = title
self.author = author
self.release = release
self.desc = desc
self.platform = platform
self.cover = cover
self.game_id = game_id
self.params = params
def __repr__(self):
formatting_string = """
title: "{0}"
author: "{1}"
date: "{2}"
description: "{3}"
platform: "{4}" """.format(self.title, self.author, self.release, self.desc.strip().split('\n')[0] + '...', self.platform)
return formatting_string
def update_field(self, field, val):
if field == 'title':
self.title = val
elif field == 'author':
self.author = val
elif field == 'release date' or field == 'release' or field == 'date':
self.release = datetime.datetime.strptime(val, '%Y-%m-%d')
elif field == 'description':
self.desc = val
elif field == 'platform':
self.platform = val
def get_itch_data(url, use_short_desc=False, use_api=False, api_token=None):
"""
Takes in an url of the form https://red-autumn.itch.io/pageant, and returns
an ItchData object.
"""
r = requests.get(url)
tree = HTMLParser(r.content.decode('utf-8'))
# 1. get game ID
# <meta content="games/[id]" name="itch:path">
game_id = tree.head.css_first('meta[name="itch:path"]').attributes['content']
game_id = game_id.split('/')[1]
# there's a json at https://api.itch.io/games/[id], but maybe we should just parse all the info from the html to avoid making another request.
# also, using the api endpoint requires an api token
# get title and author
title_author = tree.head.css_first('title').text()
title, author = title_author.split(' by ')
# get full description
desc = tree.css_first('div.formatted_description')
desc = desc.text()
# get release date, platform
rows = tree.css('tr')
release_date = None
platform = None
for row in rows:
entries = list(row.iter(False))
if entries[0].text() == 'Made with':
platform = entries[1].text()
if entries[0].text() == 'Release date' or entries[0].text() == 'Published':
date = entries[1].css_first('abbr').attributes['title']
release_date = datetime.datetime.strptime(date, '%d %B %Y @ %H:%M')
# get cover image
cover_url = tree.head.css_first('meta[property="og:image"]').attributes['content']
# download cover
cover_request = requests.get(cover_url)
image_name = cover_url.split('/')[-1]
with open(image_name, 'wb') as f:
f.write(cover_request.content)
# make the cover smaller
subprocess.call('convert {0} -resize 400x300 {0}'.format(image_name), shell=True)
if image_name.endswith('.gif'):
subprocess.call('convert {0}[0] {0}'.format(image_name), shell=True)
data = ItchData(url, title, author, release_date, desc, platform, image_name, game_id)
return data
def get_itch_json(game_id, api_token):
json_url = 'https://api.itch.io/games/' + game_id
r_json = requests.get(json_url, headers={'Authorization': api_token})
data = json.loads(r_json.content)
data = data['game']
return ItchData(
data['url'],
data['title'],
data['user']['display_name'],
datetime.datetime.strptime(data['published_at'], '%Y-%m-%dT%H:%M:%S.%f'),
data['short_text'],
None, # the field isn't here :(
data['cover_url'],
data['id']
)
def create_xml(data):
"""
Creates an ifiction XML using an ItchData object.
"""
nsmap = {None: 'http://babel.ifarchive.org/protocol/iFiction/'}
root = etree.Element('ifindex', nsmap=nsmap)
story = etree.SubElement(root, 'story')
# identification
iden = etree.SubElement(story, 'identification')
format = etree.SubElement(iden, 'format')
format.text = data.platform
# bibliographic
bibliographic = etree.SubElement(story, 'bibliographic')
title = etree.SubElement(bibliographic, 'title')
title.text = data.title
author = etree.SubElement(bibliographic, 'author')
author.text = data.author
desc = etree.SubElement(bibliographic, 'description')
desc.text = data.desc.replace('\n', '<br/>')
if data.release:
firstpublished = etree.SubElement(bibliographic, 'firstpublished')
firstpublished.text = data.release.strftime('%Y-%m-%d')
# contacts
contacts = etree.SubElement(story, 'contacts')
url = etree.SubElement(contacts, 'url')
url.text = data.url
xml = etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8')
print(xml.decode('utf-8'))
return root
def create_links(data):
nsmap = {None: 'http://ifdb.org/api/xmlns'}
root = etree.Element('downloads', nsmap=nsmap)
links = etree.SubElement(root, 'links')
link = etree.SubElement(links, 'link')
link_url = etree.SubElement(link, 'url')
link_url.text = data.url
title = etree.SubElement(link, 'title')
title.text = 'itch.io'
desc = etree.SubElement(link, 'desc')
desc.text = 'Play on itch.io'
format = etree.SubElement(link, 'format')
format.text = 'html'
return root
def run_pipeline(url=None, destination='http://ifdb.org/putific'):
if not url:
url = input('Enter an itch.io URL: ')
data = get_itch_data(url)
print('Data from itch.io: ')
print(str(data))
is_correct = input('Is this correct? (Y/N) ').lower()
while is_correct == 'n':
field = input('Which field should be corrected? ').lower()
while field not in data.fields:
field = input('Error: invalid field. Which field should be corrected? ').lower()
if field == 'date':
val = input('Enter a date (as YYYY-mm-dd): ')
else:
val = input('Enter the correct value: ')
data.update_field(field, val)
print('Updated data:')
print(str(data))
is_correct = input('Is this correct? (Y/N) ').lower()
print('\nCreating xml for ifdb upload...\n')
xml_root = create_xml(data)
links = create_links(data)
# TODO: create the wrapped file
# get ifdb username/password
ifdb_username = input('Enter your IFDB username: ')
ifdb_password = input('Enter your IFDB password: ')
upload_url = destination
upload_data = {'username': ifdb_username, 'password': ifdb_password}
output_etree = etree.tostring(xml_root, pretty_print=True, xml_declaration=True, encoding='utf-8')
output_etree = output_etree.replace(b'&lt;br/&gt;', b'<br/>')
params = {'username': ('', ifdb_username),
'password': ('', ifdb_password),
'ifiction': ('ifiction.xml', output_etree, 'text/xml'),
'links': ('links.xml', etree.tostring(links, encoding='utf-8'), 'text/xml'),
'coverart': (data.cover, open(data.cover, 'rb')),
'requireIFID': ('', 'no')}
r = requests.post(upload_url, data=upload_data, files=params)
print()
print('Response: ', r.content)
return r
if __name__ == '__main__':
result = run_pipeline()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment