This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pig -param orig=/user/bcolloran/data/fhrFullDump_2014-01-31/ -param fetchids=/tmp/sample_list.txt -param jointype=merge -param output=DEST_PATH fetch_reports.pig | |
register '/opt/cloudera/parcels/CDH/lib/pig/piggybank.jar'; | |
fulldump = LOAD '$orig' USING org.apache.pig.piggybank.storage.SequenceFileLoader AS (key:chararray, value:chararray); | |
ids_to_fetch_raw = LOAD '$fetchids' USING PigStorage() AS (key:chararray, ign:chararray); | |
ids_to_fetch = ORDER ids_to_fetch_raw BY key; | |
common = JOIN fulldump by key, ids_to_fetch by key USING '$jointype'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os | |
import sys | |
from datetime import date, timedelta, datetime | |
import simplejson as json | |
def parse(filex): | |
data = json.loads(filex.read(), 'utf8') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os, sys | |
import math | |
import happybase | |
import time | |
import simplejson as json | |
import struct | |
from datetime import datetime, timedelta |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys, os | |
import codecs | |
import datetime | |
import mrjob.job | |
import mrjob.protocol | |
import simplejson as json | |
import math |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import mechanize | |
import logging | |
import sys, os | |
USER = 'someuser@mozilla.com' | |
PASSWORD = 'somepassword' | |
CONFLUENCE_BASE_URL = "https://mana.mozilla.org" | |
CONFLUENCE_MANAGE_INDEX_URL = "https://mana.mozilla.org/wiki/admin/viewindexqueue.action" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
REGISTER 'socorro-toolbox-0.1-SNAPSHOT.jar' | |
REGISTER 'akela-0.6-SNAPSHOT.jar' | |
register 'jackson-core-2.0.6.jar' | |
register 'jackson-databind-2.0.6.jar' | |
register 'jackson-annotations-2.0.6.jar' | |
SET pig.logfile socorro-modulelist.log; | |
SET default_parallel 30; | |
SET mapred.compress.map.output false; | |
/* SET mapred.map.output.compression.codec org.apache.hadoop.io.compress.SnappyCodec; */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, sys | |
import org.apache.pig.tools.pigstats.PigStatusReporter as PigStatusReporter | |
import org.apache.pig.tools.counters.PigCounterHelper as PigCounterHelper | |
import org.apache.pig.impl.util.UDFContext as UDFContext | |
reporter = PigCounterHelper() | |
@outputSchema('modules:bag{t:tuple(filename:chararray,version:chararray,debug_file:chararray,debug_id:chararray,base_addr:chararray,max_addr:chararray)}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
import sys | |
grouped = {} | |
def parse_line(linex): | |
parts = linex.split(' ', 9) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO | |
# | |
# [1] restrict to valid firefox versions | |
SELECT DATE(TIME_SLICE(adi.bl_date, 168, 'hour', 'start')) AS "Ping Date" , | |
adi.v_prod_major AS "Product Version" , | |
l.country_name AS "Country" , | |
adi.locale AS "Locale" , | |
adi.channel AS "Release Channel" , |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name total_usable_slots total_usable_disk net_price num_hosts per_node_price | |
----------- -------------------- ------------------- ----------- ----------- ---------------- | |
m2.4xlarge 42 11550 8.26 7 1.18 | |
m1.xlarge 40 33000 9.2 20 0.46 | |
d2.4xlarge 42 71910 10.44 3 3.48 | |
m2.2xlarge 40 16400 11.8 20 0.59 | |
d2.2xlarge 42 83790 12.18 7 1.74 | |
i2.4xlarge 42 9510 12.21 3 4.07 | |
d2.8xlarge 68 95940 13.92 2 6.96 | |
i2.2xlarge 42 10990 14.21 7 2.03 |
OlderNewer