Skip to content

Instantly share code, notes, and snippets.

@davidfauth
davidfauth / SparkToNeo4j.html
Created October 8, 2019 12:46
Neo4j Spark Notebook
This file has been truncated, but you can view the full file.
<!DOCTYPE html>
<html>
<head>
<meta name="databricks-html-version" content="1">
<title>Neo4j Chicago Crime Notebook - Databricks</title>
<meta charset="utf-8">
<meta name="google" content="notranslate">
<meta name="robots" content="nofollow">
<meta http-equiv="Content-Language" content="en">
@davidfauth
davidfauth / Neo4j-Databricks-Example.txt
Last active September 25, 2019 15:36
Spark to Neo4j example
#Step 1
%python
#write a file to DBFS using Python I/O APIs
with open("/dbfs/tmp/neo4_test.csv", 'w') as f:
f.write("id,name,emp_id,employer\n")
for x in range(500):
f.write(str(x) + ",name_" + str(x) + "," + str(x) + ",emp_name_" + str(x) + "\n")
f.close()
#Step 2 Load to Dataframe
@davidfauth
davidfauth / Neo4j_4_0_DriverWriteTest
Created August 26, 2019 15:13
Neo4jDriverWrite_40Cluster
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;
import static java.util.concurrent.TimeUnit.SECONDS;
import java.time.Duration;
import java.time.Instant;
@davidfauth
davidfauth / Neo4jDriverWriteTest.java
Created August 23, 2019 21:39
neo4j 4.0 example
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;
import static java.util.concurrent.TimeUnit.SECONDS;
import java.time.Duration;
import java.time.Instant;
@davidfauth
davidfauth / rbac.cql
Created August 19, 2019 11:54
Neo4j 4.0 RBAC
CREATE DATABASE library;
CREATE DATABASE c360;
CREATE DATABASE iam;
SHOW DATABASES;
:use system;
START DATABASE library;
START DATABASE c360;
@davidfauth
davidfauth / kafkaNeo4j_Cluster
Created July 5, 2019 18:36
Neo4j Kafka Cluster
### Neo4j.conf
kafka.zookeeper.connect=172.xx.xx.xx:2181
kafka.bootstrap.servers=172.xx.xx.xx:9092
streams.sink.enabled=true
streams.sink.polling.interval=1000
streams.sink.topic.cypher.Neo4jPersonTest=MERGE (p:Person{name: event.name, surname: event.surname}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)
streams.sink.topic.cypher.blogpost_maxmind_albums=FOREACH(ignoreMe IN CASE WHEN event.type='insert' THEN [1] ELSE [] END | MERGE (c:Album{id:event.data.id}) on match set c.name = event.data.name on create set c.name = event.data.name) FOREACH(ignoreMe IN CASE WHEN event.type='delete' THEN [1] ELSE [] END | MERGE (c:Album{id:event.data.id}) detach delete c) FOREACH(ignoreMe IN CASE WHEN event.type='update' THEN [1] ELSE [] END | MERGE (c:Album{id:event.data.id}) set c.name=event.data.name, c.genre=event.data.genre)
streams.sink.topic.cypher.blogpost_maxmind_users=FOREACH(ignoreMe IN CASE WHEN event.type='insert' THEN [1] ELSE [] END | MERGE (u:User{id:event.data.id}) on match set u.nam
import simplejson
from urllib.request import urlopen
import urllib.request
import urllib.parse
from neo4j.v1 import GraphDatabase
import time
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "changeYourPassword"))
@davidfauth
davidfauth / imageIntoNeo4J.java
Created September 16, 2014 18:28
Convert image to base64 and drop into Neo4J
package pe.archty;
/**
* Created by davidfauth on 9/16/14.
*/
import java.io.IOException;
import sun.misc.BASE64Encoder;
import sun.misc.BASE64Decoder;
import java.io.ByteArrayInputStream;
@davidfauth
davidfauth / FBOPigLiveAnimalNLTK.pig
Created January 21, 2014 15:46
Pig code for live animals award description
/* filter awards by NIH */
activeHasAward = FILTER active_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0004HasAward = FILTER fy00_04_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0507HasAward = FILTER fy05_07_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0809HasAward = FILTER fy08_09_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy1011HasAward = FILTER fy10_11_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy1213HasAward = FILTER fy12_13_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
/*group Data */
@davidfauth
davidfauth / pythonBiGram.py
Created January 21, 2014 15:51
Python utility to Tokenize data and write out the top-5 bigrams
@outputSchema("top_five:bag{t:(bigram:chararray)}")
def top5_bigrams(textDescription):
sentences = nltk.tokenize.sent_tokenize(textDescription)
tokens = [nltk.tokenize.word_tokenize(s) for s in sentences]
bgm = nltk.collocations.BigramAssocMeasures()
finder = nltk.collocations.BigramCollocationFinder.from_documents(tokens)
top_5 = finder.nbest(bgm.likelihood_ratio, 5)
return [ ("%s %s" % (s[0], s[1]),) for s in top_5 ]