Skip to content

Instantly share code, notes, and snippets.

@t3rmin4t0r
t3rmin4t0r / ats-extract.py
Created December 7, 2018 01:00
ats-extract script - unpack a hive_query ATS entity
import json
import sys
class ATSFile(object):
def __init__(self, name):
self.data = json.load(open(name))
self.name = name
def dump(self):
print self.data.keys()
info = self.data["otherinfo"]
@t3rmin4t0r
t3rmin4t0r / README.md
Last active August 15, 2018 18:37
Paper review scripts

These scripts are used to review papers automatically.

@t3rmin4t0r
t3rmin4t0r / semijoin-cycle.dot
Last active August 1, 2018 20:31
graphviz semijoin task loop without operator loop
# dot -Tsvg -o semijoin-cycle.svg semijoin-cycle.dot
digraph {
rankdir=TB;
subgraph cluster_map2 {
label="Map 2";
TS_2 -> JOIN_1 -> JOIN_2 -> FS_1;
JOIN_1 -> RS_4;
FS_1 -> RS_4 [label="artificial", style="dashed", color="red", constraint=false];
}
subgraph cluster_map1 {
@t3rmin4t0r
t3rmin4t0r / jstacker.py
Created July 20, 2018 21:02
Jstack collapsing script
import sys, re, os, math, os.path
from collections import defaultdict
class JStackTrie(object):
def __init__(self):
self.roots = defaultdict(JStackTrie)
self.count = 0
self.name = "root"
@t3rmin4t0r
t3rmin4t0r / passaggi1718-clustered.sql
Last active July 13, 2018 18:17
code for Cannot INSERT OVERWRITE on clustered table with > 8 buckets
CREATE TABLE IF NOT EXISTS passaggi1718
(
PASSAGGIO int
,DISTINCTTESSERA string
,DISTINCTTESSERAXGG string
,CODICETESSERA string
,PRIMOINGRESSOAX decimal(38,10)
,PRIMOINGRESSOCALC decimal(38,10)
,PRODLORDO decimal(20,6)
,PUNTI decimal(38,10)
@t3rmin4t0r
t3rmin4t0r / query4-swapped.sql
Last active March 5, 2018 20:15
Query4 with customer_id UNIQUE constraint & c_customer_sk PRIMARY constraint
-- joins on customer_id == joins on c_customer_sk
with
sec_year_total as (
select c_customer_sk customer_id
,c_preferred_cust_flag customer_preferred_cust_flag
,d_year dyear
,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total
,'s' sale_type
from customer
,store_sales
@t3rmin4t0r
t3rmin4t0r / coin-knapsack.jl
Last active August 18, 2018 07:30
Julia JuMP solver for the coin knap sack problem
# coin problem: Given an array of coin denominations,
# write a function that returns the minimum number of
# coins that make up a given amount N. There is always
# a coin of value 1 in the coin denominations, so you
# don't have to worry about amounts that cannot possibly
# be formed. 
# Example:
# 23, {1,7,9} should return 3 (two 7s + 9);
# 21, {1,7,9} should return 3  (three 7s);
@t3rmin4t0r
t3rmin4t0r / run-tez-analyzers.sh
Last active November 30, 2017 08:29
Run all Tez analysers in one pass
CWD=$(dirname $0)
DAGZ=$1
DAGID=$(basename $1 .zip)
HADOOP_CP=$(which hadoop && hadoop classpath)
function RUNJAR {
java -cp $HADOOP_CP:$CWD/*:$CWD/lib/*: org.apache.tez.analyzer.plugins.AnalyzerDriver $*
}
@t3rmin4t0r
t3rmin4t0r / log4j2-default.xml
Created September 14, 2017 04:16
Default log4j2 file for missing StatusLogger errors
<?xml version="1.0" encoding="UTF-8"?>
<Configuration status="WARN">
<Appenders>
<RollingFile name="RollingFile" filename="log/test.log"
filepattern="${logPath}/test.log">
<PatternLayout pattern="%msg%n" />
<Policies>
<SizeBasedTriggeringPolicy size="100 MB" />
</Policies>
<DefaultRolloverStrategy max="20" />
@t3rmin4t0r
t3rmin4t0r / ParseHeapDump.md
Created September 7, 2017 15:56
Eclipse MAT in reporting mode

Download Eclipse MAT

Edit MemoryAnalyzer.ini change -vmargs to use -Xmx4G

Run

./ParseHeapDump.sh file.hprof org.eclipse.mat.api:suspects org.eclipse.mat.api:overview org.eclipse.mat.api:top_components