Last active
May 24, 2022 09:22
-
-
Save aucchen/f5e05817e71d5161a1d4eb0adce13877 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1. read data from an itch.io url | |
# 2. create an ifiction xml | |
import datetime | |
import json | |
import subprocess | |
from selectolax.parser import HTMLParser | |
import lxml.etree as etree | |
import requests | |
IFDB_Genres = [] | |
class ItchData(): | |
fields = {'title', 'author', 'date', 'description', 'platform'} | |
def __init__(self, url, title, author, release, desc, platform, cover, | |
game_id, | |
**params): | |
self.url = url | |
self.title = title | |
self.author = author | |
self.release = release | |
self.desc = desc | |
self.platform = platform | |
self.cover = cover | |
self.game_id = game_id | |
self.params = params | |
def __repr__(self): | |
formatting_string = """ | |
title: "{0}" | |
author: "{1}" | |
date: "{2}" | |
description: "{3}" | |
platform: "{4}" """.format(self.title, self.author, self.release, self.desc.strip().split('\n')[0] + '...', self.platform) | |
return formatting_string | |
def update_field(self, field, val): | |
if field == 'title': | |
self.title = val | |
elif field == 'author': | |
self.author = val | |
elif field == 'release date' or field == 'release' or field == 'date': | |
self.release = datetime.datetime.strptime(val, '%Y-%m-%d') | |
elif field == 'description': | |
self.desc = val | |
elif field == 'platform': | |
self.platform = val | |
def get_itch_data(url, use_short_desc=False, use_api=False, api_token=None): | |
""" | |
Takes in an url of the form https://red-autumn.itch.io/pageant, and returns | |
an ItchData object. | |
""" | |
r = requests.get(url) | |
tree = HTMLParser(r.content.decode('utf-8')) | |
# 1. get game ID | |
# <meta content="games/[id]" name="itch:path"> | |
game_id = tree.head.css_first('meta[name="itch:path"]').attributes['content'] | |
game_id = game_id.split('/')[1] | |
# there's a json at https://api.itch.io/games/[id], but maybe we should just parse all the info from the html to avoid making another request. | |
# also, using the api endpoint requires an api token | |
# get title and author | |
title_author = tree.head.css_first('title').text() | |
title, author = title_author.split(' by ') | |
# get full description | |
desc = tree.css_first('div.formatted_description') | |
desc = desc.text() | |
# get release date, platform | |
rows = tree.css('tr') | |
release_date = None | |
platform = None | |
for row in rows: | |
entries = list(row.iter(False)) | |
if entries[0].text() == 'Made with': | |
platform = entries[1].text() | |
if entries[0].text() == 'Release date' or entries[0].text() == 'Published': | |
date = entries[1].css_first('abbr').attributes['title'] | |
release_date = datetime.datetime.strptime(date, '%d %B %Y @ %H:%M') | |
# get cover image | |
cover_url = tree.head.css_first('meta[property="og:image"]').attributes['content'] | |
# download cover | |
cover_request = requests.get(cover_url) | |
image_name = cover_url.split('/')[-1] | |
with open(image_name, 'wb') as f: | |
f.write(cover_request.content) | |
# make the cover smaller | |
subprocess.call('convert {0} -resize 400x300 {0}'.format(image_name), shell=True) | |
if image_name.endswith('.gif'): | |
subprocess.call('convert {0}[0] {0}'.format(image_name), shell=True) | |
data = ItchData(url, title, author, release_date, desc, platform, image_name, game_id) | |
return data | |
def get_itch_json(game_id, api_token): | |
json_url = 'https://api.itch.io/games/' + game_id | |
r_json = requests.get(json_url, headers={'Authorization': api_token}) | |
data = json.loads(r_json.content) | |
data = data['game'] | |
return ItchData( | |
data['url'], | |
data['title'], | |
data['user']['display_name'], | |
datetime.datetime.strptime(data['published_at'], '%Y-%m-%dT%H:%M:%S.%f'), | |
data['short_text'], | |
None, # the field isn't here :( | |
data['cover_url'], | |
data['id'] | |
) | |
def create_xml(data): | |
""" | |
Creates an ifiction XML using an ItchData object. | |
""" | |
nsmap = {None: 'http://babel.ifarchive.org/protocol/iFiction/'} | |
root = etree.Element('ifindex', nsmap=nsmap) | |
story = etree.SubElement(root, 'story') | |
# identification | |
iden = etree.SubElement(story, 'identification') | |
format = etree.SubElement(iden, 'format') | |
format.text = data.platform | |
# bibliographic | |
bibliographic = etree.SubElement(story, 'bibliographic') | |
title = etree.SubElement(bibliographic, 'title') | |
title.text = data.title | |
author = etree.SubElement(bibliographic, 'author') | |
author.text = data.author | |
desc = etree.SubElement(bibliographic, 'description') | |
desc.text = data.desc.replace('\n', '<br/>') | |
if data.release: | |
firstpublished = etree.SubElement(bibliographic, 'firstpublished') | |
firstpublished.text = data.release.strftime('%Y-%m-%d') | |
# contacts | |
contacts = etree.SubElement(story, 'contacts') | |
url = etree.SubElement(contacts, 'url') | |
url.text = data.url | |
xml = etree.tostring(root, pretty_print=True, xml_declaration=True, encoding='UTF-8') | |
print(xml.decode('utf-8')) | |
return root | |
def create_links(data): | |
nsmap = {None: 'http://ifdb.org/api/xmlns'} | |
root = etree.Element('downloads', nsmap=nsmap) | |
links = etree.SubElement(root, 'links') | |
link = etree.SubElement(links, 'link') | |
link_url = etree.SubElement(link, 'url') | |
link_url.text = data.url | |
title = etree.SubElement(link, 'title') | |
title.text = 'itch.io' | |
desc = etree.SubElement(link, 'desc') | |
desc.text = 'Play on itch.io' | |
format = etree.SubElement(link, 'format') | |
format.text = 'html' | |
return root | |
def run_pipeline(url=None, destination='http://ifdb.org/putific'): | |
if not url: | |
url = input('Enter an itch.io URL: ') | |
data = get_itch_data(url) | |
print('Data from itch.io: ') | |
print(str(data)) | |
is_correct = input('Is this correct? (Y/N) ').lower() | |
while is_correct == 'n': | |
field = input('Which field should be corrected? ').lower() | |
while field not in data.fields: | |
field = input('Error: invalid field. Which field should be corrected? ').lower() | |
if field == 'date': | |
val = input('Enter a date (as YYYY-mm-dd): ') | |
else: | |
val = input('Enter the correct value: ') | |
data.update_field(field, val) | |
print('Updated data:') | |
print(str(data)) | |
is_correct = input('Is this correct? (Y/N) ').lower() | |
print('\nCreating xml for ifdb upload...\n') | |
xml_root = create_xml(data) | |
links = create_links(data) | |
# TODO: create the wrapped file | |
# get ifdb username/password | |
ifdb_username = input('Enter your IFDB username: ') | |
ifdb_password = input('Enter your IFDB password: ') | |
upload_url = destination | |
upload_data = {'username': ifdb_username, 'password': ifdb_password} | |
output_etree = etree.tostring(xml_root, pretty_print=True, xml_declaration=True, encoding='utf-8') | |
output_etree = output_etree.replace(b'<br/>', b'<br/>') | |
params = {'username': ('', ifdb_username), | |
'password': ('', ifdb_password), | |
'ifiction': ('ifiction.xml', output_etree, 'text/xml'), | |
'links': ('links.xml', etree.tostring(links, encoding='utf-8'), 'text/xml'), | |
'coverart': (data.cover, open(data.cover, 'rb')), | |
'requireIFID': ('', 'no')} | |
r = requests.post(upload_url, data=upload_data, files=params) | |
print() | |
print('Response: ', r.content) | |
return r | |
if __name__ == '__main__': | |
result = run_pipeline() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment