Skip to content

Instantly share code, notes, and snippets.

@shounakacharya
Last active July 22, 2024 13:23
Show Gist options
  • Save shounakacharya/871231ac86daf663adf0a8c58a106856 to your computer and use it in GitHub Desktop.
Save shounakacharya/871231ac86daf663adf0a8c58a106856 to your computer and use it in GitHub Desktop.
<!DOCTYPE html>
<meta charset="utf-8">
<svg width="960" height="600"></svg>
<script src="https://d3js.org/d3.v4.min.js"></script>
<script type="module">
import data from "./python-dependencies.json" with { type: "json" };
var nodes = data.nodes;
var links = data.links;
function getNeighbors(node) {
return links.reduce(function (neighbors, link) {
if (link.target.id === node.id) {
neighbors.push(link.source.id)
} else if (link.source.id === node.id) {
neighbors.push(link.target.id)
}
return neighbors
},
[node.id]
)
}
function isNeighborLink(node, link) {
return link.target.id === node.id || link.source.id === node.id
}
function getNodeColor(node, neighbors) {
if (Array.isArray(neighbors) && neighbors.indexOf(node.id) > -1) {
return node.level === 1 ? 'blue' : 'green'
}
return node.level === 1 ? 'red' : 'gray'
}
function getLinkColor(node, link) {
return isNeighborLink(node, link) ? 'green' : '#E5E5E5'
}
function getTextColor(node, neighbors) {
return Array.isArray(neighbors) && neighbors.indexOf(node.id) > -1 ? 'green' : 'black'
}
var width = window.innerWidth
var height = window.innerHeight
var svg = d3.select('svg')
svg.attr('width', width).attr('height', height)
// simulation setup with all forces
var linkForce = d3
.forceLink()
.id(function (link) { return link.id })
.strength(function (link) { return link.strength })
var simulation = d3
.forceSimulation()
.force('link', linkForce)
.force('charge', d3.forceManyBody().strength(-120))
.force('center', d3.forceCenter(width/2, height/2))
var dragDrop = d3.drag().on('start', function (node) {
node.fx = node.x
node.fy = node.y
}).on('drag', function (node) {
simulation.alphaTarget(0.7).restart()
node.fx = d3.event.x
node.fy = d3.event.y
})/*.on('end', function (node) {
if (!d3.event.active) {
simulation.alphaTarget(0)
}
node.fx = null
node.fy = null
})*/
function selectNode(selectedNode) {
var neighbors = getNeighbors(selectedNode)
// we modify the styles to highlight selected nodes
nodeElements.attr('fill', function (node) { return getNodeColor(node, neighbors) })
textElements.attr('fill', function (node) { return getTextColor(node, neighbors) })
linkElements.attr('stroke', function (link) { return getLinkColor(selectedNode, link) })
}
var linkElements = svg.append("g")
.attr("class", "links")
.selectAll("line")
.data(links)
.enter().append("line")
.attr("stroke-width", 1)
.attr("stroke", "rgba(50, 50, 50, 0.2)")
var linkText = svg.append("g")
.attr("class", "texts")
.selectAll("text")
.data(links)
.enter().append("text")
.attr("font-family", "Arial, Helvetica, sans-serif")
.attr("fill", "Black")
.style("font", "normal 12px Arial")
.attr("text-anchor", "middle")
.text(function(d) {
return d.linkName;
});
var nodeElements = svg.append("g")
.attr("class", "nodes")
.selectAll("circle")
.data(nodes)
.enter().append("circle")
.attr("r", 10)
.attr("fill", getNodeColor)
.call(dragDrop)
.on('click', selectNode)
var textElements = svg.append("g")
.attr("class", "texts")
.selectAll("text")
.data(nodes)
.enter().append("text")
.text(function (node) { return node.label })
.attr("font-size", 15)
.attr("dx", 15)
.attr("dy", 4)
simulation.nodes(nodes).on('tick', () => {
nodeElements
.attr('cx', function (node) { return node.x })
.attr('cy', function (node) { return node.y })
textElements
.attr('x', function (node) { return node.x })
.attr('y', function (node) { return node.y })
linkElements
.attr('x1', function (link) { return link.source.x })
.attr('y1', function (link) { return link.source.y })
.attr('x2', function (link) { return link.target.x })
.attr('y2', function (link) { return link.target.y })
linkText
.attr("x", function(link) {return ((link.source.x + link.target.x)/2)})
.attr("y", function(link) {return ((link.source.y + link.target.y)/2)})
})
simulation.force("link").links(links)
</script>
import os
import json
import getpass
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from pymongo import MongoClient
from pprint import pprint
llm = ChatOpenAI(temperature=0, model_name="gpt-4-turbo",api_key="YOUR-API-KEY")
llm_transformer = LLMGraphTransformer(llm=llm)
text = """
Python was invented in the late 1980s by Guido van Rossum at Centrum Wiskunde & Informatica in the Netherlands as a successor to the ABC
programming language, which was inspired by SETL capable of exception handling and interfacing with the Amoeba operating system.
Its implementation began in December 1989. Python 2.0 was released on 16 October 2000, with many major new features such as list comprehensions,
cycle-detecting garbage collection, reference counting, and Unicode support. Python 3.0, released on 3 December 2008,
with many of its major features backported to Python 2.6.x and 2.7.x. Releases of Python 3 include the 2to3 utility,
which automates the translation of Python 2 code to Python 3.
"""
documents = [Document(page_content=text)]
graph_documents = llm_transformer.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")
#Identifying and Creating Collections
nodes = graph_documents[0].nodes
relationships = graph_documents[0].relationships
collections = set()
for node in nodes:
collections.add(node.type)
print(collections)
try:
uri = "YOUR-MONGO-URL"
client = MongoClient(uri)
database = client["generic_graph"]
for collection in collections:
database.create_collection(collection)
except Exception as e:
print(e)
finally:
client.close()
#Figuring out all relationship types per node types
node_relationship_types = {}
for node in nodes:
#print(f'On Node {node.id}')
node_relationship_types[node.id] = set()
for rel in relationships:
#print(f'Looking at Relationship for {rel.source.id}')
if rel.source.id == node.id:
node_relationship_types[node.id].add(rel.type)
print(node_relationship_types)
# Creating documents to be inserted
mongo_documents = []
for node in nodes:
document_dict = {}
document_dict['id'] = node.id
document_dict['type'] = node.type
document_relations = node_relationship_types[node.id]
for document_relation in document_relations:
document_dict[document_relation] = []
for rel in relationships:
if rel.source.id == node.id:
document_dict[rel.type].append(rel.target.id)
mongo_documents.append(document_dict)
print(mongo_documents)
# Inserting the built documents into respective collections
try:
uri = "YOUR-MONGO-URL"
client = MongoClient(uri)
database = client["generic_graph"]
for mongo_document in mongo_documents:
collection = database[mongo_document['type']]
collection.insert_one(mongo_document)
except Exception as e:
print(e)
finally:
client.close()
import os
import json
import getpass
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship
from pymongo import MongoClient
from pprint import pprint
llm = ChatOpenAI(temperature=0, model_name="gpt-4-turbo",api_key="YOUR-OPENAI-KEY")
llm_transformer = LLMGraphTransformer(llm=llm)
text = """
Python was invented in the late 1980s by Guido van Rossum at Centrum Wiskunde & Informatica in the Netherlands as a successor to the ABC
programming language, which was inspired by SETL capable of exception handling and interfacing with the Amoeba operating system.
Its implementation began in December 1989. Python 2.0 was released on 16 October 2000, with many major new features such as list comprehensions,
cycle-detecting garbage collection, reference counting, and Unicode support. Python 3.0, released on 3 December 2008,
with many of its major features backported to Python 2.6.x and 2.7.x. Releases of Python 3 include the 2to3 utility,
which automates the translation of Python 2 code to Python 3.
"""
documents = [Document(page_content=text)]
graph_documents = llm_transformer.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")
#Create collections based on node types
nodes = graph_documents[0].nodes
relationships = graph_documents[0].relationships
collections = set()
for node in nodes:
collections.add(node.type)
print(collections)
try:
uri = "YOUR-MONGO-CLUSTER-URL"
client = MongoClient(uri)
database = client["embedded_graph_2"]
for collection in collections:
database.create_collection(collection)
except Exception as e:
print(e)
finally:
client.close()
#Figuring out all relationship types per node types
node_relationship_types = {}
for node in nodes:
#print(f'On Node {node.id}')
node_relationship_types[node.id] = set()
for rel in relationships:
#print(f'Looking at Relationship for {rel.source.id}')
if rel.source.id == node.id:
node_relationship_types[node.id].add(rel.type)
print(node_relationship_types)
#Creating All Mongo documents with embedded links and nodes and inserting into MongoDB
mongo_documents = []
for node in nodes:
document_dict = {}
document_dict['id'] = node.id
document_dict['type'] = node.type
document_dict['d3_edges'] = []
document_dict['d3_target_nodes'] = []
document_dict['d3_source_node'] = {'id':node.id,'group':0,'level':1,'label':node.id}
document_relations = node_relationship_types[node.id]
for document_relation in document_relations:
document_dict[document_relation] = []
for rel in relationships:
if rel.source.id == node.id:
document_dict[rel.type].append(rel.target.id)
document_dict['d3_target_nodes'].append({'id':rel.target.id,'group':1,'level':2,'label':rel.target.id})
document_dict['d3_edges'].append({'source':node.id,'target':rel.target.id,'strength':0.7,'linkName':rel.type})
mongo_documents.append(document_dict)
pprint(mongo_documents)
try:
uri = "YOUR-MONGO-CLUSTER-URL"
client = MongoClient(uri)
database = client["embedded_graph_2"]
for mongo_document in mongo_documents:
collection = database[mongo_document['type']]
collection.insert_one(mongo_document)
except Exception as e:
print(e)
finally:
client.close()
import pymongo
try:
uri = "YOUR-MONGO-URL"
client = MongoClient(uri)
database = client["generic_graph"]
language_collection = database["Programming_language"]
language_pipeline = [
{
'$graphLookup': {
'from': 'Programming_language',
'startWith': '$SUCCESSOR_OF',
'connectFromField': 'id',
'connectToField': 'id',
'as': 'relations',
'maxDepth': 2
}
}, {
'$unwind': {
'path': '$relations',
'preserveNullAndEmptyArrays': False
}
}
]
lang_aggCursor = language_collection.aggregate(language_pipeline)
nodes=[]
links=[]
for document in lang_aggCursor:
print(document)
source_node_dict = {}
source_node_dict['id'] = document.get('id')
source_node_dict['group'] = 0
source_node_dict['level'] = 1
source_node_dict['label'] = document.get('id')
nodes.append(source_node_dict)
for key in document.keys():
print(key)
target_node_dict = {}
link_dict = {}
if key=='_id' or key=='id' or key=='type' or key=='SUCCESSOR_OF':
continue
elif key=='relations':
target_node_dict['id']=document[key]['id']
target_node_dict['group']=1
target_node_dict['level']=2
target_node_dict['label'] = document[key]['id']
inspired_node_dict = {}
inspired_node_dict['id'] = document[key]['INSPIRED_BY'][0]
inspired_node_dict['group'] = 1
inspired_node_dict['level'] = 2
inspired_node_dict['label'] = document[key]['INSPIRED_BY'][0]
link_dict['source'] = target_node_dict.get('id')
link_dict['target'] = inspired_node_dict.get('id')
link_dict['strength'] = 0.7
link_dict_2 = {}
link_dict_2['source'] = source_node_dict.get('id')
link_dict_2['target'] = target_node_dict.get('id')
link_dict_2['strength'] = 0.7
nodes.append(target_node_dict)
nodes.append(inspired_node_dict)
links.append(link_dict)
links.append(link_dict_2)
continue
else:
target_node_dict['id'] = document[key][0]
target_node_dict['group']=1
target_node_dict['level']=2
target_node_dict['label'] = document[key][0]
link_dict['source'] = source_node_dict.get('id')
link_dict['target'] = target_node_dict.get('id')
link_dict['strength'] = 0.7
nodes.append(target_node_dict)
links.append(link_dict)
print(nodes)
print(links)
except Exception as e:
print(e)
finally:
client.close()
nodes_links = {"nodes": nodes,"links":links }
import json
with open("python-dependencies.json",'w') as f:
json.dump(nodes_links,f,indent=1)
master_lookup_set = set()
nodes=[]
links=[]
try:
uri = "YOUR-MONGO-CLUSTER-URL"
client = MongoClient(uri)
database = client["embedded_graph_2"]
collection = database["Programming_language"]
cursor = collection.find({},{'_id':0,'id':1,'d3_edges':1,'d3_target_nodes':1,'d3_source_node':1})
for document in cursor:
print(document)
if document['id'] not in master_lookup_set:
master_lookup_set.add(document['id'])
nodes.append(document['d3_source_node'])
for link in document['d3_edges']:
links.append(link)
master_lookup_set.add(link['target'])
for target_node in document['d3_target_nodes']:
nodes.append(target_node)
else:
for link in document['d3_edges']:
links.append(link)
except Exception as e:
print(e)
finally:
client.close()
nodes_links = {"nodes": nodes,"links":links }
import json
with open("python-dependencies_embedded_2.json",'w') as f:
json.dump(nodes_links,f,indent=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment