C.J. Sullivan cj2001

## grapher.py
import json
import argparse
import yaml
from typing import Any, Dict

import networkx as nx
import pandas as pd
from tqdm import tqdm
import pygraphviz
import graphviz

## load_hero_edges.cypher
LOAD CSV WITH HEADERS FROM "file:///heros_edge_list.csv" AS row
MATCH (h1:hero {hero: row.hero})
MATCH (c1:comic {comic: row.comic})
CALL apoc.create.relationship(h1, c1) YIELD rel
REMOVE rel.noOp;

## neo4j_start_docker_container.sh
docker run -p 7474:7474 -p 7687:7687 \
  --volume=$HOME/graph_data/data:/data \
  --volume=$HOME/graph_data/gameofthrones/data:/var/lib/neo4j/import \
  --volume=$HOME/graph_data/plugins:/var/lib/neo4j/plugins \
  --volume=$home/neo4j/logs:/var/lib/neo4j/logs \
  --env NEO4J_dbms_security_procedures_unrestricted=gds.\\\*,apoc.\\\* \
  --env apoc.import.file.enabled=true \
  --env NEO4J_AUTH=neo4j/1234 \
  neo4j:latest

## load_csv_nodes.cql
WITH "file:///got-s1-nodes.csv" AS uri
LOAD CSV WITH HEADERS FROM uri AS row
MERGE (c:Character {id:row.Id})
SET c.name = row.Label

## load_arxiv_data.py
file = "./arxiv-metadata-oai-snapshot.json"

metadata  = []

lines = 100000    # 100k for testing

with open(file, 'r') as f:

    for line in tqdm(f):
        metadata.append(json.loads(line))

## clean_author_and_category_lists.py
def get_author_list(line):
    # Cleans author dataframe column, creating a list of authors in the row.
    return [e[1] + ' ' + e[0] for e in line]


def get_category_list(line):
    # Cleans category dataframe column, creating a list of categories in the row.
    return list(line.split(" "))


## neo4j_python_connection_class.py
class Neo4jConnection:

    def __init__(self, uri, user, pwd):
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:

## create_arxiv_constraints.py
conn.query('CREATE CONSTRAINT papers IF NOT EXISTS ON (p:Paper)     ASSERT p.id IS UNIQUE')
conn.query('CREATE CONSTRAINT authors IF NOT EXISTS ON (a:Author) ASSERT a.name IS UNIQUE')
conn.query('CREATE CONSTRAINT categories IF NOT EXISTS ON (c:Category) ASSERT c.category IS UNIQUE')

## paper_nodes_and_edges.py
def add_papers(rows, batch_size=5000):
   # Adds paper nodes and (:Author)--(:Paper) and
   # (:Paper)--(:Category) relationships to the Neo4j graph as a
   # batch job.

   query = '''
   UNWIND $rows as row
   MERGE (p:Paper {id:row.id}) ON CREATE SET p.title = row.title

   // connect categories

## add_category_and_author_nodes.py
categories = pd.DataFrame(df[['category_list']])
categories.rename(columns={'category_list':'category'},
                  inplace=True)
categories = categories.explode('category') \
                       .drop_duplicates(subset=['category'])

authors = pd.DataFrame(df[['cleaned_authors_list']])
authors.rename(columns={'cleaned_authors_list':'author'},
               inplace=True)
authors=authors.explode('author').drop_duplicates(subset=['author'])
	import json
	import argparse
	import yaml
	from typing import Any, Dict

	import networkx as nx
	import pandas as pd
	from tqdm import tqdm
	import pygraphviz
	import graphviz
	LOAD CSV WITH HEADERS FROM "file:///heros_edge_list.csv" AS row
	MATCH (h1:hero {hero: row.hero})
	MATCH (c1:comic {comic: row.comic})
	CALL apoc.create.relationship(h1, c1) YIELD rel
	REMOVE rel.noOp;
	docker run -p 7474:7474 -p 7687:7687 \
	--volume=$HOME/graph_data/data:/data \
	--volume=$HOME/graph_data/gameofthrones/data:/var/lib/neo4j/import \
	--volume=$HOME/graph_data/plugins:/var/lib/neo4j/plugins \
	--volume=$home/neo4j/logs:/var/lib/neo4j/logs \
	--env NEO4J_dbms_security_procedures_unrestricted=gds.\\\,apoc.\\\ \
	--env apoc.import.file.enabled=true \
	--env NEO4J_AUTH=neo4j/1234 \
	neo4j:latest
	WITH "file:///got-s1-nodes.csv" AS uri
	LOAD CSV WITH HEADERS FROM uri AS row
	MERGE (c:Character {id:row.Id})
	SET c.name = row.Label
	file = "./arxiv-metadata-oai-snapshot.json"

	metadata = []

	lines = 100000 # 100k for testing

	with open(file, 'r') as f:

	for line in tqdm(f):
	metadata.append(json.loads(line))
	def get_author_list(line):
	# Cleans author dataframe column, creating a list of authors in the row.
	return [e[1] + ' ' + e[0] for e in line]


	def get_category_list(line):
	# Cleans category dataframe column, creating a list of categories in the row.
	return list(line.split(" "))
	class Neo4jConnection:

	def __init__(self, uri, user, pwd):
	self.__uri = uri
	self.__user = user
	self.__pwd = pwd
	self.__driver = None
	try:
	self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
	except Exception as e:
	conn.query('CREATE CONSTRAINT papers IF NOT EXISTS ON (p:Paper) ASSERT p.id IS UNIQUE')
	conn.query('CREATE CONSTRAINT authors IF NOT EXISTS ON (a:Author) ASSERT a.name IS UNIQUE')
	conn.query('CREATE CONSTRAINT categories IF NOT EXISTS ON (c:Category) ASSERT c.category IS UNIQUE')
	def add_papers(rows, batch_size=5000):
	# Adds paper nodes and (:Author)--(:Paper) and
	# (:Paper)--(:Category) relationships to the Neo4j graph as a
	# batch job.

	query = '''
	UNWIND $rows as row
	MERGE (p:Paper {id:row.id}) ON CREATE SET p.title = row.title

	// connect categories
	categories = pd.DataFrame(df[['category_list']])
	categories.rename(columns={'category_list':'category'},
	inplace=True)
	categories = categories.explode('category') \
	.drop_duplicates(subset=['category'])

	authors = pd.DataFrame(df[['cleaned_authors_list']])
	authors.rename(columns={'cleaned_authors_list':'author'},
	inplace=True)
	authors=authors.explode('author').drop_duplicates(subset=['author'])