Skip to content

Instantly share code, notes, and snippets.

@everdark
Last active May 17, 2020 04:10
Show Gist options
  • Save everdark/6879938 to your computer and use it in GitHub Desktop.
Save everdark/6879938 to your computer and use it in GitHub Desktop.
Bash shell automation cheat sheet
# save as ~/.screenrc
startup_message off
caption always "%{=u .r} %-w%<%{=ub .Y}%n %t%{=u .r}%+w "
hardstatus alwaysignore
hardstatus alwayslastline "%{= .K} [%l]%<%=%{= .Y}$USER%{= .R}@%H %=%{= .m} %Y/%m/%d%{= .M} %0c "
defutf8 on
#caption always "%{= wk} %{= KY} [%n]%t @ %H %{-} %= %{= KR} %l %{-} | %{= KG} %Y-%m-%d %{-} "
#hardstatus alwayslastline " %-Lw%{= Bw}%n%f %t%{-}%+Lw %=| %0c:%s "
defscrollback 20480
#!/bin/sh
# Kyle Chung<alienatio@pixnet.net>
# Description:
# The script loop backward the date time as an input for an executable (a MapReduce job here),
# and echo the elipsed time in minutes for each iteration.
# Output file is copied to local and renamed after date string format %Y%m%d.
# Notice that HDFS is not rewritable so the file on cloud is dropped for each iteration.
for i in $(seq 1 7)
do
dtime=$(date -d $i'days ago' +%Y/%m/%d)
hadoop fs -rmr -skipTrash tmp_data
tm_start=`date +%s`
pig -p date=$dtime -p out=tmp_data -f get_data.pig
tm_end=`date +%s`
tm_cost=$((($tm_end-$tm_start)/60))
echo time cost: $tm_cost mins
fname=$(echo $dtime | sed -e "s/\///g")
hadoop fs -getmerge tmp_data data/$fname
done
#!/usr/bin/python
from datetime import datetime, timedelta
from os import system
date_interval = 7
report_date = datetime(2014, 2, 12)
start_date = report_date - timedelta(days=date_interval)
covering_dates = []
while start_date < report_date:
covering_dates.append(start_date.strftime('%Y/%m/%d'))
start_date += timedelta(days=1)
for index, d in enumerate(covering_dates):
print('hadoop fs -getmerge /Application/ews/' + d + '/* test' + str(index)) # use system to execute the command
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment