Skip to content

Instantly share code, notes, and snippets.

@jexp
Last active January 31, 2022 10:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jexp/4d0e56ecd53f5f868eb6dfe9d322ec07 to your computer and use it in GitHub Desktop.
Save jexp/4d0e56ecd53f5f868eb6dfe9d322ec07 to your computer and use it in GitHub Desktop.
Simple GEDCOM to CSV converter
#!/usr/bin/python3
# usage: ./gedcom2csv.py file.get > file.csv
# https://pypi.org/project/python-gedcom/
import sys
from gedcom.element.individual import IndividualElement
from gedcom.parser import Parser
file_path = sys.argv[1]
gedcom_parser = Parser()
gedcom_parser.parse_file(file_path)
root_child_elements = gedcom_parser.get_root_child_elements()
print("first,last,birth,death,gender,id,mother,father")
root_child_elements = gedcom_parser.get_root_child_elements()
for e in root_child_elements:
if isinstance(e, IndividualElement):
(first,last) = e.get_name()
data = [first, last, str(e.get_birth_year()),str(e.get_death_year()),e.get_gender(),e.get_pointer()]
parents = gedcom_parser.get_parents(e)
data = data + [p.get_pointer() for p in parents if p.get_gender() == 'F']
data = data + [p.get_pointer() for p in parents if p.get_gender() == 'M']
print(",".join(data))
#!/usr/bin/python3
# usage: NEO4J_URI="bolt://localhost" NEO4J_PASSWORD=secret ./gedcom2neo4j.py file.ged
# https://pypi.org/project/python-gedcom/
# https://pypi.org/project/neo4j/
import sys
import os
from gedcom.element.individual import IndividualElement
from gedcom.parser import Parser
from neo4j import GraphDatabase
driver = GraphDatabase.driver(os.getenv('NEO4J_URI'), auth=("neo4j", os.getenv('NEO4J_PASSWORD')))
file_path = sys.argv[1]
gedcom_parser = Parser()
gedcom_parser.parse_file(file_path)
root_child_elements = gedcom_parser.get_root_child_elements()
statement = """
UNWIND $data as row
MERGE (p:Person {id:row.id})
SET p += row {.first, .last,.sex,
death: toInteger(row.death),birth:toInteger(row.birth)}
SET p.name = p.first + ' ' + p.last
CALL { WITH row, p
WITH * WHERE NOT coalesce(row.mother,'') = ''
MERGE (m:Person {id:row.mother})
MERGE (p)-[:MOTHER]->(m)
RETURN count(*) as mothers
}
CALL { WITH row, p
WITH * WHERE NOT coalesce(row.father,'') = ''
MERGE (f:Person {id:row.father})
MERGE (p)-[:FATHER]->(f)
RETURN count(*) as fathers
}
RETURN count(*) AS total
"""
data = []
root_child_elements = gedcom_parser.get_root_child_elements()
for e in root_child_elements:
if isinstance(e, IndividualElement):
(first,last) = e.get_name()
row = {"first":first, "last":last}
row["birth"]=e.get_birth_year()
row["death"]=e.get_death_year()
row["gender"]=e.get_gender()
row["id"]=e.get_pointer()
parents = gedcom_parser.get_parents(e)
row["father"]=next(iter([p.get_pointer() for p in parents if p.get_gender() == 'F']),None)
row["mother"]=next(iter([p.get_pointer() for p in parents if p.get_gender() == 'M']),None)
data= data + [row]
with driver.session() as session:
total = session.write_transaction(
lambda tx: tx.run(statement, data = data).single()['total'])
print("Entries added {total}".format(total=total))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment