Skip to content

Instantly share code, notes, and snippets.

@lepe92
Created February 17, 2022 22:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lepe92/c59c0d8d0d7baadd1351a0090cf3a503 to your computer and use it in GitHub Desktop.
Save lepe92/c59c0d8d0d7baadd1351a0090cf3a503 to your computer and use it in GitHub Desktop.
import pandas as pd
import networkx as nx
data = pd.read_csv('/Users/ejimenez/Downloads/rex_dependencies.csv') #this csv is just the export of rex.dependencies table
# importing matplotlib.pyplot
import matplotlib.pyplot as plt
#columns ['job_filepath', 'relationship', 'related_table', 'table_filepath','date_macro', 'run_date', 'schedule_interval']
# we just need job_filepath, relationship and related table
rollup_dependencies = nx.DiGraph()
for index, row in data.iterrows():
#print(row['job_filepath'], row['relationship'],row['related_table'])
# if provides, create a new node
if row['relationship'] == 'provides':
rollup_dependencies.add_node(row['related_table'], rollup_file=row['job_filepath'])
# if requires review if node exists or create a new one and the relatioship to those
# where rollup_file == row['job_filepath']
elif row['relationship'] == 'requires':
if row['related_table'] not in rollup_dependencies:
rollup_dependencies.add_node(row['related_table'], rollup_file = row['job_filepath'])
matches = [n for n in rollup_dependencies.nodes if rollup_dependencies.nodes[n]['rollup_file'] == row['job_filepath']]
#print(matches, row['related_table']) #use that match to assign relationship
for match_node in matches:
rollup_dependencies.add_edge(row['related_table'], match_node)
"""
print(rollup_dependencies.edges)
predecessors = rollup_dependencies.predecessors('etsy-data-warehouse-prod.rollups.user_resolution_times')
successors = rollup_dependencies.successors('etsy-data-warehouse-prod.rollups.user_resolution_times')
for p in predecessors:
print ("P :"+p)
for s in successors:
print ("S :"+s)
predecessors = rollup_dependencies.predecessors('etsy-data-warehouse-prod.rollups.chat')
successors = rollup_dependencies.successors('etsy-data-warehouse-prod.rollups.chat')
for p in predecessors:
print ("P :"+p)
for s in successors:
print ("S :"+s)
"""
else:
print(row['relationship'])
#print(rollup_dependencies.nodes[row['related_table']]['rollup_file'])
#print(rollup_dependencies.nodes)
print(nx.number_of_nodes(rollup_dependencies))
print(nx.number_of_edges(rollup_dependencies))
nx.write_gpickle(rollup_dependencies, "/Users/ejimenez/Downloads/test.gpickle")
for node in rollup_dependencies:
print("NODE -> ", node)
required_tables = rollup_dependencies.predecessors(node) #required tables
for required in required_tables:
print("PARENT NODE -> "+required)
provided_tables = rollup_dependencies.successors(node) #provided tables
for provided in provided_tables:
print("CHILD NODE" + provided)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment