Skip to content

Instantly share code, notes, and snippets.

@andry1
Created July 2, 2013 22:20
Show Gist options
  • Save andry1/5913776 to your computer and use it in GitHub Desktop.
Save andry1/5913776 to your computer and use it in GitHub Desktop.
SET mapreduce.output.fileoutputformat.compress true
SET mapreduce.output.fileoutputformat.compress.codec org.apache.hadoop.io.compress.SnappyCodec
SET mapreduce.output.fileoutputformat.compress.type BLOCK
SET pig.exec.mapPartAgg true
SET mapreduce.job.queuename testing
xfp_all = LOAD 'chris_295640.impression' USING org.apache.hcatalog.pig.HCatLoader();
xfp_partition = FILTER xfp_all BY year=='2013' AND month=='06' AND day=='19' AND hour=='20';
xfp = FILTER xfp_partition BY lineitemid != 0 AND time != 'Time';
spm = LOAD 'chris_295640.site_page_mapping' USING org.apache.hcatalog.pig.HCatLoader();
dma = LOAD 'google_295640.designated_market_areas' USING org.apache.hcatalog.pig.HCatLoader();
countries = LOAD 'google_295640.countries' USING org.apache.hcatalog.pig.HCatLoader();
states = LOAD 'google_295640.states' USING org.apache.hcatalog.pig.HCatLoader();
xfp_join1 = JOIN xfp BY adunitid, spm BY zone_source_id USING 'replicated';
xfp_join2 = JOIN xfp_join1 BY (metroid - 200000) LEFT OUTER, dma BY code USING 'replicated';
xfp_join3 = JOIN xfp_join2 BY countryid LEFT OUTER, countries BY xfp_id USING 'replicated';
xfp_join4 = JOIN xfp_join3 BY regionid LEFT OUTER, states BY xfp_id USING 'replicated';
xfp_out = FOREACH xfp_join4 GENERATE time,userid,'0.0.0.0',lineitemid,creativeid,creativeversion,xfp_join1::spm::site_source_id,xfp_join1::spm::zone_source_id,
targetedcustomcriteria,
(xfp_join3::countries::source_id IS NULL ? '0' : xfp_join3::countries::source_id),
(states::abbr IS NULL ? '0' : states::abbr),
browserid,
(xfp_join2::dma::source_id IS NULL ? '0' : xfp_join2::dma::source_id),
((bandwidthgroupid >= 1 AND bandwidthgroupid <= 5) ? bandwidthgroupid - 1 : 0),
customtargeting, postalcode;
STORE xfp_out INTO '/user/chris/xfp_pig' USING PigStorage('\\xFE','noschema');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment