Skip to content

Instantly share code, notes, and snippets.

#hosts file fragments for a specific docker network
docker ps -q | xargs docker inspect --format '{{ .NetworkSettings.Networks.hadoop.IPAddress }} {{ range $index,$element := .NetworkSettings.Networks.hadoop.Aliases }}{{$element}} {{end}}'
hosts_to_inventory(){cat /etc/hosts | grep $1 | awk '{print "node" NR, "ansible_host=" $2, "ansible_ssh_host=" $1, "ansible_ssh_user=root"}';}
known_hosts(){cat /etc/hosts | grep $1 | awk '{print $1 "\n" $2 "\n" $3}' | xargs -n1 ssh-keyscan -H >> ~/.ssh/known_hosts}

Put the files to the root of the hadoop directory, and after a -Pdist build just do a docker-compose up -d

For scaling up: docker-compose scale datanode=3

#!/usr/bin/env bash
echo "Size before:"
du -sh
find -name jdiff | xargs rm -rf
find -name "*sources*.jar" | xargs rm
find -name "aws-java-sdk-bundle*" | xargs rm
#Replace duplicate with symbolic links
@elek
elek / Dockerfile
Last active September 10, 2018 19:28
FROM flokkr/hadoop-runner:latest
#This is supported only from 17.09 (but kubernetes supports 17.03)
ADD --chown=hadoop:flokkr target/ozone-0.2.1-SNAPSHOT /opt/hadoop
WORKDIR /opt/hadoop
apiVersion: v1
kind: ServiceAccount
metadata:
name: tiller
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: tiller
RULE Hadoop RPC request
INTERFACE ^com.google.protobuf.BlockingService
METHOD callBlockingMethod
IF true
DO traceln("--> RPC message request: " + $3.getClass().getSimpleName() + " from " + linked(Thread.currentThread(), "source"));
traceln($3.toString())
ENDRULE
RULE Hadoop RPC response
@elek
elek / ozonepa.sh
Last active August 22, 2018 16:27
#!/usr/bin/env bash
QUERY="project = HDDS AND status = 'Patch Available' order by updated desc"
printf "%-9s [%-14s] %-70s (%-14s) %-30s\n" "KEY" "ASSIGNEE" "SUMMARY" "LASTCOMMENT" "REVIEWERS"
for ONE in $(curl -G -s -X GET -H "Content-Type: application/json" "https://issues.apache.org/jira/rest/api/2/search?fields=summary,key,status,components,assignee,comment" --data-urlencode "jql=$QUERY" | jq -r '.issues[] | @base64'); do
_jq() {
echo ${ONE} | base64 --decode | jq -r ${1}
}
LAST_COMMENTER=$(_jq '.fields.comment.comments[-1].author.name')
ASSIGNEE=$(_jq '.fields.assignee.name')
ALL_COMMENTER=$(_jq '.fields.comment.comments[]|.author.name')
@elek
elek / p1.1h.sh
Created September 15, 2018 10:27
#!/bin/bash
export TMPDIR=/tmp
QUERY="project = HDFS AND status = 'Patch Available' and updated < -14d order by updated desc"
TMPFILE=$(mktemp)
curl -G -s -X GET -H "Content-Type: application/json" "https://issues.apache.org/jira/rest/api/2/search?fields=summary,key,status,components" --data-urlencode "jql=$QUERY" > $TMPFILE
echo "PA" `cat $TMPFILE | jq '.issues | length'`
echo "---"
cat $TMPFILE | jq -r '.issues[] as $issues | $issues.key + " " + $issues.fields.summary + " | href=https://issues.apache.org/jira/browse/" + $issues.key'
rm $TMPFILE
echo "---"
@elek
elek / commit_jira.sh
Created March 6, 2019 10:25
Simple shell script to commit apache jira with the right commit message and autor information
#!/usr/bin/env bash
JIRA_ISSUE="$1"
COMMIT_TO_GET_AUTHOR=${2:-FETCH_HEAD}
if [ ! "JIRA_ISSUE" ] || [ ! $COMMIT_TO_GET_AUTHOR ]; then
cat << EOF
Usage: jcommit <JIRA_ID> <BRANCH_REF>"