Skip to content

Instantly share code, notes, and snippets.

@davidfauth
davidfauth / singleLineJson.java
Created Jan 9, 2014
convert mutli-line json to single line json
View singleLineJson.java
package jsonFormatter;
import java.io.*;
import java.nio.file.FileVisitResult;
import java.nio.file.FileVisitor;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
@davidfauth
davidfauth / pythonBiGram.py
Created Jan 21, 2014
Python utility to Tokenize data and write out the top-5 bigrams
View pythonBiGram.py
@outputSchema("top_five:bag{t:(bigram:chararray)}")
def top5_bigrams(textDescription):
sentences = nltk.tokenize.sent_tokenize(textDescription)
tokens = [nltk.tokenize.word_tokenize(s) for s in sentences]
bgm = nltk.collocations.BigramAssocMeasures()
finder = nltk.collocations.BigramCollocationFinder.from_documents(tokens)
top_5 = finder.nbest(bgm.likelihood_ratio, 5)
return [ ("%s %s" % (s[0], s[1]),) for s in top_5 ]
@davidfauth
davidfauth / FBOPigLiveAnimalNLTK.pig
Created Jan 21, 2014
Pig code for live animals award description
View FBOPigLiveAnimalNLTK.pig
/* filter awards by NIH */
activeHasAward = FILTER active_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0004HasAward = FILTER fy00_04_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0507HasAward = FILTER fy05_07_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0809HasAward = FILTER fy08_09_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy1011HasAward = FILTER fy10_11_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy1213HasAward = FILTER fy12_13_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
/*group Data */
@davidfauth
davidfauth / imageIntoNeo4J.java
Created Sep 16, 2014
Convert image to base64 and drop into Neo4J
View imageIntoNeo4J.java
package pe.archty;
/**
* Created by davidfauth on 9/16/14.
*/
import java.io.IOException;
import sun.misc.BASE64Encoder;
import sun.misc.BASE64Decoder;
import java.io.ByteArrayInputStream;
View pythonNeo4jUberH3
import simplejson
from urllib.request import urlopen
import urllib.request
import urllib.parse
from neo4j.v1 import GraphDatabase
import time
uri = "bolt://localhost:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "changeYourPassword"))
View kafkaNeo4j_Cluster
### Neo4j.conf
kafka.zookeeper.connect=172.xx.xx.xx:2181
kafka.bootstrap.servers=172.xx.xx.xx:9092
streams.sink.enabled=true
streams.sink.polling.interval=1000
streams.sink.topic.cypher.Neo4jPersonTest=MERGE (p:Person{name: event.name, surname: event.surname}) MERGE (f:Family{name: event.surname}) MERGE (p)-[:BELONGS_TO]->(f)
streams.sink.topic.cypher.blogpost_maxmind_albums=FOREACH(ignoreMe IN CASE WHEN event.type='insert' THEN [1] ELSE [] END | MERGE (c:Album{id:event.data.id}) on match set c.name = event.data.name on create set c.name = event.data.name) FOREACH(ignoreMe IN CASE WHEN event.type='delete' THEN [1] ELSE [] END | MERGE (c:Album{id:event.data.id}) detach delete c) FOREACH(ignoreMe IN CASE WHEN event.type='update' THEN [1] ELSE [] END | MERGE (c:Album{id:event.data.id}) set c.name=event.data.name, c.genre=event.data.genre)
streams.sink.topic.cypher.blogpost_maxmind_users=FOREACH(ignoreMe IN CASE WHEN event.type='insert' THEN [1] ELSE [] END | MERGE (u:User{id:event.data.id}) on match set u.nam
View rbac.cql
CREATE DATABASE library;
CREATE DATABASE c360;
CREATE DATABASE iam;
SHOW DATABASES;
:use system;
START DATABASE library;
START DATABASE c360;
View Neo4jDriverWriteTest.java
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;
import static java.util.concurrent.TimeUnit.SECONDS;
import java.time.Duration;
import java.time.Instant;
@davidfauth
davidfauth / Neo4j_4_0_DriverWriteTest
Created Aug 26, 2019
Neo4jDriverWrite_40Cluster
View Neo4j_4_0_DriverWriteTest
import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.logging.Level;
import static java.util.concurrent.TimeUnit.SECONDS;
import java.time.Duration;
import java.time.Instant;
View Neo4j-Databricks-Example.txt
#Step 1
%python
#write a file to DBFS using Python I/O APIs
with open("/dbfs/tmp/neo4_test.csv", 'w') as f:
f.write("id,name,emp_id,employer\n")
for x in range(500):
f.write(str(x) + ",name_" + str(x) + "," + str(x) + ",emp_name_" + str(x) + "\n")
f.close()
#Step 2 Load to Dataframe