Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Pig code for live animals award description
/* filter awards by NIH */
activeHasAward = FILTER active_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0004HasAward = FILTER fy00_04_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0507HasAward = FILTER fy05_07_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy0809HasAward = FILTER fy08_09_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy1011HasAward = FILTER fy10_11_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
fy1213HasAward = FILTER fy12_13_data BY (noticeType == 'Award Notice' AND contractAwardAmount IS NOT NULL AND classCode == '88 -- Live animals');
/*group Data */
allAwardData = UNION activeHasAward, fy0004HasAward, fy0507HasAward, fy0809HasAward, fy1011HasAward, fy1213HasAward;
allDescrData = UNION active_descr, fy0004descr, fy0507descr, fy0809descr, fy1011descr, fy1213descr;
/* join the data */
joinedLADetails = JOIN allDescrData BY awardID, allAwardData BY noticeID;
/*remove HTML tagging from description */
B = FOREACH joinedLADetails GENERATE awardID, agencyName, awardee, contractAwardAmount, nltk_udfs.stripHTML(description) AS fboDescr;
liveAnimalResults = FOREACH B GENERATE
awardID, agencyName, awardee, contractAwardAmount,nltk_udfs.top5_bigrams(fboDescr), fboDescr;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment