Skip to content

Instantly share code, notes, and snippets.

@t3rmin4t0r
t3rmin4t0r / orc-stripe-verify.py
Created July 2, 2014 17:37
ORC stripe verifier
import sys
import re
S_RE = re.compile(r'Stripe: offset: ([0-9]*) data: ([0-9]*) rows: ([0-9]*).*')
items = [m.groups() for m in [S_RE.search(l) for l in sys.stdin] if m]
parsed = [(int(a),int(b), int(c)) for (a,b,c) in items]
stripe_size = 256*1024*1024
for (start, len, rows) in parsed:
if (start / stripe_size) != ((start+len) / stripe_size):
print start+len, "overflows", start, "block"
@t3rmin4t0r
t3rmin4t0r / Centos-Source.repo
Created July 30, 2014 13:29
Centos SRPMs repository
[base-source]
name=CentOS-$releasever - Base Source
baseurl=http://vault.centos.org/centos/$releasever/os/Source/
enabled=0
[updates-source]
name=CentOS-$releasever - Updates Source
baseurl=http://vault.centos.org/centos/$releasever/updates/Source/
enabled=0
@t3rmin4t0r
t3rmin4t0r / transpose.py
Created October 24, 2014 07:27
transpose.py
import sys,re,math,os
import curses
from time import sleep
running=re.compile(r'Status: Running \(application id: (application_[0-9_]*)\)')
tasks=re.compile(r'(Map [0-9]*|Reducer [0-9]*): ([\-0-9]*)(\(.[0-9]*\))?/([\-0-9]*)')
log = open("log", "w")
debug = lambda a: (log.write(str(a)+"\n"),log.flush())
@t3rmin4t0r
t3rmin4t0r / ddl.py
Created November 7, 2014 00:50
Hive Serialization.ddl to Create Table helper
import sys
for l in sys.stdin:
l = l.strip()
name = l[0:l.find("{")]
cols = [x.strip().split(" ") for x in l[l.find("{")+1:l.find("}")].split(",")]
print "create table ", name, "(",
print ",".join(["%s %s" % (b,a) for (a,b) in cols]), ");"
[gopal@cn041-10 comcast]$
@t3rmin4t0r
t3rmin4t0r / reducer-counters.py
Last active August 29, 2015 14:16
AM history parser for reducer skew checks
import sys
import re
def Counter(name):
pattern = re.compile("%s=([^,]*)" % name)
# warning closure
def get(l):
m = pattern.search(l)
if m:
return m.group(1)
@t3rmin4t0r
t3rmin4t0r / jstacker.py
Created June 25, 2015 19:43
JStack Trie Maker
import sys, re, os, math, os.path
from collections import defaultdict
class JStackTrie(object):
def __init__(self):
self.roots = defaultdict(JStackTrie)
self.count = 0
self.name = "root"
@t3rmin4t0r
t3rmin4t0r / namenode-appminer.py
Created July 27, 2015 23:52
Extract list of files created per-application for the HDFS NN (from logs)
import re
import sys, math, os.path
from glob import glob
from itertools import groupby,chain
from collections import defaultdict
import re
def parse(f):
PAT = re.compile(r'DIR\* completeFile: ([^ ]*) is closed by ([^ ]*)')
@t3rmin4t0r
t3rmin4t0r / join-test.py
Last active August 29, 2015 14:27
join query generator for testing
import itertools
from random import shuffle
def permutations(l):
result = []
for i in xrange(len(l)+1):
result += list(itertools.permutations(l,i))
return result
int main()
{
__asm__(";foo");
char a[1024] = {0,};
__asm__(";end");
/*
# 4 "x.c" 1
;foo
# 0 "" 2
leal 12(%esp), %ebx
@t3rmin4t0r
t3rmin4t0r / zend-open-files.txt
Created October 30, 2010 19:15
Zend CG(open_files)
Culprit - zend_op_array *compile_filename(int type, zval *filename TSRMLS_DC)
====================PHP 5.1====================
ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle TSRMLS_DC)
{
zend_llist_del_element(&CG(open_files), file_handle, (int (*)(void *, void *)) zend_compare_file_handles);
}
...