Skip to content

Instantly share code, notes, and snippets.

@griggheo
griggheo / gist:2698152
Created May 14, 2012 23:51
dynamodb batchwriteitem in boto
import os
import sys
import subprocess
import re
import optparse
import boto
dynamodb_conn = boto.connect_dynamodb(aws_access_key_id='MY_ACCESS_KEY_ID', aws_secret_access_key='MY_SECRET_ACCESS_KEY')
table_name = 'mytable'
dynamodb_table = dynamodb_conn.get_table(table_name)
@griggheo
griggheo / nginx_time_conversions.pig
Created February 9, 2012 17:11
Pig script for Nginx access log date/time conversions
REGISTER file:/home/hadoop/lib/pig/piggybank.jar;
DEFINE EXTRACT org.apache.pig.piggybank.evaluation.string.EXTRACT();
DEFINE CustomFormatToISO org.apache.pig.piggybank.evaluation.datetime.convert.CustomFormatToISO();
DEFINE ISOToUnix org.apache.pig.piggybank.evaluation.datetime.convert.ISOToUnix();
DEFINE DATE_TIME org.apache.pig.piggybank.evaluation.datetime.DATE_TIME();
DEFINE FORMAT_DT org.apache.pig.piggybank.evaluation.datetime.FORMAT_DT();
DEFINE FORMAT org.apache.pig.piggybank.evaluation.string.FORMAT();
RAW_LOGS = LOAD '$INPUT' as (line:chararray);
@griggheo
griggheo / mail_sendtime.pig
Created February 9, 2012 16:26
Pig script for analyzing mail sending times
REGISTER file:/home/hadoop/lib/pig/piggybank.jar;
DEFINE EXTRACT org.apache.pig.piggybank.evaluation.string.EXTRACT();
DEFINE CustomFormatToISO org.apache.pig.piggybank.evaluation.datetime.convert.CustomFormatToISO();
DEFINE ISOToUnix org.apache.pig.piggybank.evaluation.datetime.convert.ISOToUnix();
DEFINE DATE_TIME org.apache.pig.piggybank.evaluation.datetime.DATE_TIME();
DEFINE FORMAT_DT org.apache.pig.piggybank.evaluation.datetime.FORMAT_DT();
@griggheo
griggheo / gist:1340429
Created November 4, 2011 20:43
EMR automation
#!/bin/bash
TIMESTAMP=`date "+%Y%m%d%H%M"`
EMR_DIR=/opt/emr
LOG_FILE=$EMR_DIR/run_emr_cluster.log.$TIMESTAMP
START=`date "+%Y-%m-%d %H:%M"`
echo $START > $LOG_FILE
@griggheo
griggheo / gist:1340282
Created November 4, 2011 19:38
elastic-mapreduce --describe output
{
"JobFlows": [
{
"LogUri": "s3n:\/\/somebucket.yourcompany.com\/logs\/",
"Name": "test1",
"BootstrapActions": [],
"SupportedProducts": [],
"ExecutionStatusDetail": {
"EndDateTime": null,
"CreationDateTime": 1320167627.0,
import os, sys, time
from socket import gethostname
from optparse import OptionParser
from grizzled.os import daemonize
PYTHON_BINARY = "python"
PATH_TO_PYTHON_BINARY = "/usr/bin/%s" % PYTHON_BINARY
PATH_TO_PYTHON_BINARY = "/opt/tornado/myvenv/bin/python"
ROTATELOGS_CMD = "/usr/sbin/rotatelogs"
LOGDIR = "/opt/tornado/logs"
#!/usr/bin/env ruby
require 'rubygems'
require 'chef'
require 'chef/client'
require 'chef/run_context'
Chef::Config[:solo] = true
Chef::Config[:log_level] = :info
Chef::Log.level(:info)
import logging
import os
import tornado.escape
import tornado.httpclient
import tornado.httpserver
import tornado.ioloop
import tornado.options
import tornado.web
#!/usr/bin/env python
from libcloud.types import Provider
from libcloud.providers import get_driver
EC2_ACCESS_ID = 'xxxxxxx'
EC2_SECRET_KEY = 'yyyyyyyyyyyyyyyyyyyyyyyyyy'
EC2Driver = get_driver(Provider.EC2)
conn = EC2Driver(EC2_ACCESS_ID, EC2_SECRET_KEY)
#!/usr/bin/env python
import os
from libcloud.types import Provider
from libcloud.providers import get_driver
from libcloud.base import NodeImage, NodeSize
EC2_ACCESS_ID = 'xxxxxxxxxxxxxxxxxxxxx'
EC2_SECRET_KEY = 'yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy'