Skip to content

Instantly share code, notes, and snippets.

@meyarivan
meyarivan / Hive+S3 Tuning
Last active May 4, 2016 12:15
Hive + S3 tuning
Tunables:
- mapreduce.input.fileinputformat.list-status.num-threads (hive-site.xml)
- fs.s3a.block.size
JIRAs:
- https://issues.apache.org/jira/browse/HADOOP-12810
- https://issues.apache.org/jira/browse/HADOOP-9565
- https://issues.apache.org/jira/browse/HADOOP-12878
#!/bin/bash
for DB in `hive -e "SHOW DATABASES;"`; do
for tbl in `hive -e "USE ${DB}; SHOW TABLES"`; do
hive -e "DESC FORMATTED ${DB}.${tbl};" > desc.${DB}.${tbl}
done
done
@meyarivan
meyarivan / mysql_mariadb.py
Created August 7, 2015 13:11
mysql_mariadb.py
# stdlib
import subprocess
import os
import sys
import re
import traceback
# project
from checks import AgentCheck
from utils.platform import Platform
@meyarivan
meyarivan / stream.py
Created June 9, 2015 22:57
Decode Kafka msgs submitted by Mypipe
#!/usr/bin/env python2
from __future__ import print_function
import os, sys
from kafka import KafkaConsumer
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
name total_usable_slots total_usable_disk net_price num_hosts per_node_price
----------- -------------------- ------------------- ----------- ----------- ----------------
m2.4xlarge 42 11550 8.26 7 1.18
m1.xlarge 40 33000 9.2 20 0.46
d2.4xlarge 42 71910 10.44 3 3.48
m2.2xlarge 40 16400 11.8 20 0.59
d2.2xlarge 42 83790 12.18 7 1.74
i2.4xlarge 42 9510 12.21 3 4.07
d2.8xlarge 68 95940 13.92 2 6.96
i2.2xlarge 42 10990 14.21 7 2.03
# TODO
#
# [1] restrict to valid firefox versions
SELECT DATE(TIME_SLICE(adi.bl_date, 168, 'hour', 'start')) AS "Ping Date" ,
adi.v_prod_major AS "Product Version" ,
l.country_name AS "Country" ,
adi.locale AS "Locale" ,
adi.channel AS "Release Channel" ,
#!/usr/bin/env python2
import sys
grouped = {}
def parse_line(linex):
parts = linex.split(' ', 9)
import os, sys
import org.apache.pig.tools.pigstats.PigStatusReporter as PigStatusReporter
import org.apache.pig.tools.counters.PigCounterHelper as PigCounterHelper
import org.apache.pig.impl.util.UDFContext as UDFContext
reporter = PigCounterHelper()
@outputSchema('modules:bag{t:tuple(filename:chararray,version:chararray,debug_file:chararray,debug_id:chararray,base_addr:chararray,max_addr:chararray)}')
REGISTER 'socorro-toolbox-0.1-SNAPSHOT.jar'
REGISTER 'akela-0.6-SNAPSHOT.jar'
register 'jackson-core-2.0.6.jar'
register 'jackson-databind-2.0.6.jar'
register 'jackson-annotations-2.0.6.jar'
SET pig.logfile socorro-modulelist.log;
SET default_parallel 30;
SET mapred.compress.map.output false;
/* SET mapred.map.output.compression.codec org.apache.hadoop.io.compress.SnappyCodec; */
#!/usr/bin/env python
import mechanize
import logging
import sys, os
USER = 'someuser@mozilla.com'
PASSWORD = 'somepassword'
CONFLUENCE_BASE_URL = "https://mana.mozilla.org"
CONFLUENCE_MANAGE_INDEX_URL = "https://mana.mozilla.org/wiki/admin/viewindexqueue.action"