Skip to content

Instantly share code, notes, and snippets.

@adipasquale
Created February 15, 2019 08:37
Show Gist options
  • Save adipasquale/ab2a1c4035a4f79e84df12d3b8afbf74 to your computer and use it in GitHub Desktop.
Save adipasquale/ab2a1c4035a4f79e84df12d3b8afbf74 to your computer and use it in GitHub Desktop.
"""
Adds parent_conteneur_id and parent_texte_id to
tetiers, textes, sections and articles
pip3 install psycopg2-binary
"""
from argparse import ArgumentParser
import psycopg2
import psycopg2.extras
def deal_with_object(db_connection, table, doc_id):
sql = """
WITH RECURSIVE hierarchie(element, depth) AS (
SELECT sommaires.element, 0 AS depth, sommaires.parent
FROM sommaires
WHERE sommaires.element = '%s'
UNION ALL
SELECT DISTINCT sommaires.element, depth + 1 AS depth, sommaires.parent
FROM sommaires, hierarchie
WHERE sommaires.element = hierarchie.parent
)
SELECT hierarchie.parent, hierarchie.element
FROM hierarchie
""" % doc_id
cur = db_connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur.execute(sql)
rows = cur.fetchall()
parent_texte_ids = [row["parent"] for row in rows if row["parent"][4:8] == "TEXT"]
if len(parent_texte_ids) == 0:
print("did not find any parent texte for %s" % (doc_id))
elif len(parent_texte_ids) > 1:
print(sql)
print("there were %s parent textes for %s" % (len(parent_texte_ids), doc_id))
parent_conteneur_ids = [row["parent"] for row in rows if row["parent"][4:8] == "CONT"]
if len(parent_conteneur_ids) > 1:
print(sql)
raise Exception("there were %s parent conteneurs for %s" % (len(parent_conteneur_ids), doc_id))
if __name__ == '__main__':
p = ArgumentParser()
p.add_argument(
'--base',
default="LEGI",
choices=["LEGI", "KALI"],
help="the database published by the DILA on which to execute"
)
p.add_argument('--dry-run', action='store_true', default=False)
args = p.parse_args()
db_connection = psycopg2.connect(
"dbname=%s user=user password=pass host=localhost port=5444" %
(args.base.lower())
)
for table in ["articles"]:
cur = db_connection.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur.execute("""
SELECT id FROM %s
WHERE parent_conteneur_id IS NULL
OR parent_texte_id IS NULL
""" % table)
c = 0
for doc in cur:
c +=1
deal_with_object(db_connection, table, doc["id"])
print("went through %s %ss" % (c, table))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment