Skip to content

Instantly share code, notes, and snippets.

@tjmadonna
Created October 25, 2024 18:24
Show Gist options
  • Save tjmadonna/48f7d00d84ee0b36094bf337e7cabc87 to your computer and use it in GitHub Desktop.
Save tjmadonna/48f7d00d84ee0b36094bf337e7cabc87 to your computer and use it in GitHub Desktop.
Update Dataset Metadata
import ast
from neo4j import GraphDatabase
NEO4J_URI = ""
NEO4J_USERNAME = ""
NEO4J_PASSWORD = ""
AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)
changed_uuids = []
with GraphDatabase.driver(NEO4J_URI, auth=AUTH) as driver:
connected = driver.verify_connectivity()
# Query for all datasets with ingest_metadata
records, summary, keys = driver.execute_query(
"MATCH (n:Dataset) WHERE n.ingest_metadata IS NOT NULL "
"RETURN n.uuid AS uuid, n.ingest_metadata AS ingest_metadata"
)
changed_uuids = []
for dataset in records:
uuid = dataset["uuid"]
ingest_metadata = dataset.get("ingest_metadata")
if ingest_metadata is None:
continue
ingest_metadata = ast.literal_eval(ingest_metadata)
current_metadata = ingest_metadata.get("metadata")
current_files = ingest_metadata.get("files")
current_dag_provenance_list = ingest_metadata.get("dag_provenance_list")
predicate_list = []
new_metadata = None
if current_metadata is not None:
new_metadata = str(current_metadata)
predicate_list.append("n.metadata = $metadata")
new_files = None
if current_files is not None:
new_files = str(current_files)
predicate_list.append("n.files = $files")
if current_dag_provenance_list is not None:
new_ingest_metadata = str({
"dag_provenance_list": current_dag_provenance_list,
})
predicate_list.append("n.ingest_metadata = $ingest_metadata")
else :
new_ingest_metadata = None
predicate_list.append("n.ingest_metadata = null")
predicate = ", ".join(predicate_list)
try:
update = f"MATCH (n:Dataset {{uuid: $uuid}}) SET {predicate}"
driver.execute_query(update, uuid=uuid, metadata=new_metadata, files=new_files, ingest_metadata=new_ingest_metadata)
changed_uuids.append(uuid)
except Exception as e:
print(f"Error updating metadata for {uuid}: {e}")
print(f"Updated metadata for {len(changed_uuids)} entities")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment