Last active
February 23, 2021 17:38
-
-
Save maolagin/3f15bd06ddce55713c16d738c5008219 to your computer and use it in GitHub Desktop.
EDDB factions Python example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First download the daily data dump files from EDDB, e.g. with the following commands: | |
# curl --compressed -o systems_populated.json 'https://eddb.io/archive/v6/systems_populated.json' | |
# curl --compressed -o factions.csv 'https://eddb.io/archive/v6/factions.csv' | |
import pandas as pd | |
import numpy as np | |
import json | |
import csv | |
# Note string dtypes select Pandas nullable types | |
fac_col_types = dict(government_id='Int64', allegiance_id='Int64', home_system_id='Int64', is_player_faction='bool') | |
factions = pd.read_csv('factions.csv', dtype=fac_col_types, index_col='id') | |
popsys = pd.read_json('systems_populated.json', orient='records') | |
popsys = popsys.set_index('id') | |
# Add distance from origin so we can select just Bubble systems | |
popsys['d_origin'] = (popsys.x**2 + popsys.y**2 + popsys.z**2).apply(np.sqrt) | |
# Faction presences is an interesting case. Each entry in popsys includes a JSON data structure containing a variable | |
# number of mappings recording factions (by ID number) present in the system. Pandas loads these as a Python object | |
# (specifically a list of dicts). Here we pull them all out into a separate table available for joins. | |
presences = [] | |
for id in popsys.index: | |
for p in popsys.loc[id]['minor_faction_presences']: | |
presences.append({**p, 'system_id': id, | |
'controlling': p['minor_faction_id'] == popsys.loc[id]['controlling_minor_faction_id']}) | |
presences_df = pd.DataFrame(presences) | |
# An example of a reasonably complex query that requires joining all the tables and performing a grouping transform | |
# The goal of this query is to identify systems that would be candidates for placing a Player Minor Faction | |
# Note that we make heavy use of Pandas' method chaining here - this is not especially idiomatic Python style | |
presences_df[['system_id','minor_faction_id','influence','controlling']]\ | |
.join(factions['is_player_faction'], on='minor_faction_id')\ | |
.groupby('system_id')\ | |
.agg({ | |
'influence': 'max', | |
'is_player_faction': 'any', | |
'minor_faction_id': 'count' | |
})\ | |
.join(popsys[['name','needs_permit','population','controlling_minor_faction','d_origin']])\ | |
.rename({'minor_faction_id': 'minor_faction_count', 'is_player_faction': 'has_player_faction'}, axis=1)\ | |
.query("has_player_faction == False and minor_faction_count < 7 and needs_permit == False and d_origin < 500")\ | |
.loc[:,['name','population','controlling_minor_faction','influence','minor_faction_count','d_origin']]\ | |
.sort_values('d_origin').to_csv('pmf_candidates.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment