View MortarToElk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
register '/Users/davidfauth/Downloads/elasticsearch-hadoop-1.3.0.M1.jar'; | |
define ESStorage org.elasticsearch.hadoop.pig.ESStorage('es.resource=fbo/awards'); | |
-- More code here | |
B = FOREACH joinedActiveDetails GENERATE | |
CONCAT(SUBSTRING(postedDate,0,10),'T12:30:00-05:00') as searchPostedDate, | |
classCode as searchClassCode, | |
naicsCode as searchNaicsCode, | |
agencyName as searchAgencyName, |
View mortarDocGraphEnhanced
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filteredNPIData = FOREACH npiData GENERATE | |
REPLACE(NPI, '\\"','') as npiRX, | |
REPLACE(Provider_Business_Mailing_Address_State_Name, '\\"','') as NPIState, | |
REPLACE(Healthcare_Provider_Taxonomy_Code_1, '\\"','') as NPITaxonomy, | |
REPLACE(Provider_Organization_Name_Legal_Business_Name, '\\"','') as NPIOrgName, | |
CONCAT(CONCAT(REPLACE(Provider_First_Name,'\\"',''),' '), REPLACE(Provider_Last_Name_Legal_Name,'\\"','')) as docName; | |
--join the filtered set to the NPI data to get the NPI information | |
joinReferred = JOIN docGraphRXData BY referringDoc, filteredNPIData by npiRX; |
View sampleBitcoinTransaction.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"hash160":"a54e0ee6071328dc58c8c37a4e974c4816364f24", | |
"address":"1G541ENwQBqG3WZgvYtVCojVgdHFpJ8RXs", | |
"n_tx":25, | |
"total_received":3393980000, | |
"total_sent":3393980000, | |
"final_balance":0, | |
"txs":[{ | |
"ver":1, | |
"inputs":[ |
View bitcoinAddressDetails
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"hash160":"a54e0ee6071328dc58c8c37a4e974c4816364f24", | |
"address":"1G541ENwQBqG3WZgvYtVCojVgdHFpJ8RXs", | |
"n_tx":25, | |
"total_received":3393980000, | |
"total_sent":3393980000, | |
"final_balance":0, | |
"txs":[{ | |
"ver":1, | |
"inputs":[ |
View Neo4jHive.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.neo4j.hadoop.example; | |
import org.codehaus.jackson.map.ObjectMapper; | |
import org.neo4j.graphdb.*; | |
import org.neo4j.graphdb.schema.Schema; | |
import org.neo4j.helpers.collection.MapUtil; | |
import org.neo4j.tooling.GlobalGraphOperations; | |
import javax.ws.rs.GET; | |
import javax.ws.rs.POST; |
View mortarPitElasticsearch.pig
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 'Document' is the delimiter | |
-- 'event, gathering' is the tag list | |
%default OUTPUT_PATH '/Users/davidfauth/MortarBillsData' | |
REGISTER '/Users/davidfauth/mortarProjects/billsProject/udfs/python/billsProject.py' USING streaming_python AS nltk_udfs; | |
REGISTER '/Users/davidfauth/Downloads/elasticsearch-hadoop-1.3.0.M1.jar'; | |
define ESStorage org.elasticsearch.hadoop.pig.ESStorage('es.resource=govtrack/bills'); | |
bills = LOAD '/Users/davidfauth/MortarData/' |
View jsonSingleLine.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jsonFormatter; | |
import java.io.*; | |
import java.nio.file.FileVisitResult; | |
import java.nio.file.FileVisitor; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.nio.file.SimpleFileVisitor; | |
import java.nio.file.attribute.BasicFileAttributes; |
View nltk.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pig_util import outputSchema | |
import nltk | |
@outputSchema("top_five:bag{t:(bigram:chararray)}") | |
def top_5_bigrams(tweets): | |
tokenized_tweets = [ nltk.tokenize.WhitespaceTokenizer().tokenize(t[0]) for t in tweets ] | |
bgm = nltk.collocations.BigramAssocMeasures() | |
finder = nltk.collocations.BigramCollocationFinder.from_documents(tokenized_tweets) | |
top_5 = finder.nbest(bgm.likelihood_ratio, 5) |
View mortarNeo4JExample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 'Document' is the delimiter | |
-- 'event, gathering' is the tag list | |
%default OUTPUT_PATH '/Users/davidfauth/MortarBillsData' | |
%default S3_OUTPUT_PATH 's3n://df-bills-project' | |
%default S3_INPUT_PATH 's3n://df-bills-data' | |
%default INPUT_PATH '/Users/davidfauth/MortarNeoTestData' | |
%default BULK_INPUT_PATH '/Users/davidfauth/MortarTestDataBulk' | |
REGISTER '/Users/davidfauth/mortarProjects/billsProject/udfs/python/billsProject.py' USING streaming_python AS nltk_udfs; | |
REGISTER '/Users/davidfauth/mortarProjects/billsProject/udfs/python/utilities.py' USING streaming_python AS utility_udfs; |
View fbo_analysis_gist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* FBO_Data | |
*/ | |
%default INPUT_PATH '/Users/davidfauth/fbo_data/fbo_data_active.csv' | |
%default INPUT_NEW_PATH '/Users/davidfauth/fbo_data/fbo_data_pig/fbo_data_archive_12_13_tab.txt' | |
%default INPUT_DATA_PATH '/Users/davidfauth/fbo_data/fbo_data_pig' | |
%default OUTPUT_PATH '/Users/davidfauth/MortarBillsData' | |
/** |
OlderNewer