hdfs dfs -copyFromLocal file hdfs://path/to/dir/file
hdfs dfs -copyToLocal hdfs://path/to/dir/file file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DELETE A TABLE IN THE HIVE METASTORE | |
# BE CAREFUL! BACKUP THE DB BEFORE PROCEEDING! | |
set @table_name = ''; | |
SELECT @tbl_id := TBl_ID FROM TBLS WHERE TBL_NAME = @table_name; | |
-- Delete partition key vals | |
DELETE pvk | |
FROM PARTITION_KEY_VALS pvk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
example1 = 1118363648 | |
def numeric_to_ip(ip): | |
parts = [] | |
while ip: | |
parts.append(ip & 255) | |
ip = ip >> 8 | |
return '.'.join([str(p) for p in reversed(parts)]) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import time | |
logger = logging.getLogger(__name__) | |
class NoSuchActivityError(Exception): | |
pass | |
- Install hadoop to get the required jars (
brew install hadoop
) - Create a spark-env.sh (
cp /usr/local/Cellar/apache-spark/1.6.1/libexec/conf/spark-env.sh.template /usr/local/Cellar/apache-spark/1.6.1/libexec/conf/spark-env.sh
) - Set HADOOP_CONF_DIR in spark-env.sh (
export HADOOP_CONF_DIR=/usr/local/Cellar/hadoop/2.7.2/libexec/etc/hadoop/
) - Add the required jars to the
SPARK_CLASSPATH
inspark-env.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Requires aws and spark (the sparkline charting tool, not apache spark) | |
brew install awscli spark | |
""" | |
import argparse | |
import datetime as dt | |
import json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import operator as op | |
# g = nested_getter(1, 1, 1) | |
# g((0, (0, (0, 1)))) -> 1 | |
def nested_getter(*args): | |
def getter(seq): | |
for func in (op.itemgetter(a) for a in args): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
# http://docs.aws.amazon.com/ElasticMapReduce/latest/ReleaseGuide/emr-spark-submit-step.html#dynamic-configuration | |
# https://github.com/grafke/Drone-workflow-controller/blob/7f40968f4164aede4e67070f5a4c0894dcc6d776/drone/actions/emr_launcher.py | |
# https://boto3.readthedocs.org/en/latest/reference/services/emr.html#EMR.Client.run_job_flow | |
# http://docs.aws.amazon.com/ElasticMapReduce/latest/ReleaseGuide/emr-spark-submit-step.html#dynamic-configuration | |
INSTANCE_CONFIG = { | |
'InstanceGroups': [ | |
{ |