This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
register '/Users/davidfauth/Downloads/elasticsearch-hadoop-1.3.0.M1.jar'; | |
define ESStorage org.elasticsearch.hadoop.pig.ESStorage('es.resource=fbo/awards'); | |
-- More code here | |
B = FOREACH joinedActiveDetails GENERATE | |
CONCAT(SUBSTRING(postedDate,0,10),'T12:30:00-05:00') as searchPostedDate, | |
classCode as searchClassCode, | |
naicsCode as searchNaicsCode, | |
agencyName as searchAgencyName, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filteredNPIData = FOREACH npiData GENERATE | |
REPLACE(NPI, '\\"','') as npiRX, | |
REPLACE(Provider_Business_Mailing_Address_State_Name, '\\"','') as NPIState, | |
REPLACE(Healthcare_Provider_Taxonomy_Code_1, '\\"','') as NPITaxonomy, | |
REPLACE(Provider_Organization_Name_Legal_Business_Name, '\\"','') as NPIOrgName, | |
CONCAT(CONCAT(REPLACE(Provider_First_Name,'\\"',''),' '), REPLACE(Provider_Last_Name_Legal_Name,'\\"','')) as docName; | |
--join the filtered set to the NPI data to get the NPI information | |
joinReferred = JOIN docGraphRXData BY referringDoc, filteredNPIData by npiRX; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"hash160":"a54e0ee6071328dc58c8c37a4e974c4816364f24", | |
"address":"1G541ENwQBqG3WZgvYtVCojVgdHFpJ8RXs", | |
"n_tx":25, | |
"total_received":3393980000, | |
"total_sent":3393980000, | |
"final_balance":0, | |
"txs":[{ | |
"ver":1, | |
"inputs":[ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"hash160":"a54e0ee6071328dc58c8c37a4e974c4816364f24", | |
"address":"1G541ENwQBqG3WZgvYtVCojVgdHFpJ8RXs", | |
"n_tx":25, | |
"total_received":3393980000, | |
"total_sent":3393980000, | |
"final_balance":0, | |
"txs":[{ | |
"ver":1, | |
"inputs":[ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.neo4j.hadoop.example; | |
import org.codehaus.jackson.map.ObjectMapper; | |
import org.neo4j.graphdb.*; | |
import org.neo4j.graphdb.schema.Schema; | |
import org.neo4j.helpers.collection.MapUtil; | |
import org.neo4j.tooling.GlobalGraphOperations; | |
import javax.ws.rs.GET; | |
import javax.ws.rs.POST; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 'Document' is the delimiter | |
-- 'event, gathering' is the tag list | |
%default OUTPUT_PATH '/Users/davidfauth/MortarBillsData' | |
REGISTER '/Users/davidfauth/mortarProjects/billsProject/udfs/python/billsProject.py' USING streaming_python AS nltk_udfs; | |
REGISTER '/Users/davidfauth/Downloads/elasticsearch-hadoop-1.3.0.M1.jar'; | |
define ESStorage org.elasticsearch.hadoop.pig.ESStorage('es.resource=govtrack/bills'); | |
bills = LOAD '/Users/davidfauth/MortarData/' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package jsonFormatter; | |
import java.io.*; | |
import java.nio.file.FileVisitResult; | |
import java.nio.file.FileVisitor; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.nio.file.SimpleFileVisitor; | |
import java.nio.file.attribute.BasicFileAttributes; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pig_util import outputSchema | |
import nltk | |
@outputSchema("top_five:bag{t:(bigram:chararray)}") | |
def top_5_bigrams(tweets): | |
tokenized_tweets = [ nltk.tokenize.WhitespaceTokenizer().tokenize(t[0]) for t in tweets ] | |
bgm = nltk.collocations.BigramAssocMeasures() | |
finder = nltk.collocations.BigramCollocationFinder.from_documents(tokenized_tweets) | |
top_5 = finder.nbest(bgm.likelihood_ratio, 5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- 'Document' is the delimiter | |
-- 'event, gathering' is the tag list | |
%default OUTPUT_PATH '/Users/davidfauth/MortarBillsData' | |
%default S3_OUTPUT_PATH 's3n://df-bills-project' | |
%default S3_INPUT_PATH 's3n://df-bills-data' | |
%default INPUT_PATH '/Users/davidfauth/MortarNeoTestData' | |
%default BULK_INPUT_PATH '/Users/davidfauth/MortarTestDataBulk' | |
REGISTER '/Users/davidfauth/mortarProjects/billsProject/udfs/python/billsProject.py' USING streaming_python AS nltk_udfs; | |
REGISTER '/Users/davidfauth/mortarProjects/billsProject/udfs/python/utilities.py' USING streaming_python AS utility_udfs; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* FBO_Data | |
*/ | |
%default INPUT_PATH '/Users/davidfauth/fbo_data/fbo_data_active.csv' | |
%default INPUT_NEW_PATH '/Users/davidfauth/fbo_data/fbo_data_pig/fbo_data_archive_12_13_tab.txt' | |
%default INPUT_DATA_PATH '/Users/davidfauth/fbo_data/fbo_data_pig' | |
%default OUTPUT_PATH '/Users/davidfauth/MortarBillsData' | |
/** |
OlderNewer