Skip to content

Instantly share code, notes, and snippets.

@arelra
arelra / xargs
Created April 6, 2015 00:44
xargs multiple arg
cat keylist.txt | xargs -n 1 -P 4 -I{} sh -c './lister keys/{} > {}.txt;'
@arelra
arelra / gist:16549ff7815fed125177
Last active August 29, 2015 14:18
parallel cp
find . -maxdepth 1 -type f -name 'x*.txt' -printf '%f\n' | xargs cat | parallel -j16 -N 1000 -I{} cp {} --target-directory=/data &
@arelra
arelra / gist:3a4d05ead97636146065
Created April 7, 2015 22:53
group by count by line length command line
awk '{print length}' cut.txt | sort -g | uniq -c
@arelra
arelra / ec2volume
Last active August 29, 2015 14:18
attach / detach volume to ec2 instance
attach
------
aws ec2 attach-volume --volume-id vol-123 --instance-id i-123 --device /dev/sdf --region us-east-1
lsblk (renamed on linux as xv*)
sudo file -s /dev/xvdf (if reports only 'data' then no filesystem)
sudo mkfs -t ext4 /dev/xvdf (ONLY IF NO FILESYSTEM ie NEW VOLUME)
sudo mkdir /data (create mount-point e.g. data)
sudo mount /dev/xvdf /data (mount device at /data)
sudo chmod xxx /data (to add permissions)
optional: mount volume on instance restart
@arelra
arelra / installAWSCli
Last active August 29, 2015 14:18
install aws cli
sudo apt-get install python-pip
sudo pip install awscli
aws configure
@arelra
arelra / gzip-parallel
Created April 14, 2015 01:37
gzip in parallel
ls | parallel -j16 -I{} tar -czf /data/keys/{}.tar.gz {} &
@arelra
arelra / findbymins
Created April 14, 2015 11:47
find files older than xmins and sync
find . -type f -mmin +60 | xargs -I {} aws s3 sync {} s3://....
@arelra
arelra / diffsoutput
Created April 15, 2015 22:09
Diff output diffs only
diff file1 file2 | awk '{if ( $2 != "") print $2}'
@arelra
arelra / gist:9e9a5387b9cb33cdecb3
Last active September 21, 2015 22:40
s3-to-xmlstarlet-to-json-to-elasticsearch
#!/bin/bash
while read -r line
do
printf "attempting: $line > "
tmpfile=./tmp/$line.xml
aws s3 cp s3://*****/$line $tmpfile > /dev/null
if grep -q "<???????:para>" $tmpfile; then
printf "ABSTRACT: Y > "
xmlstarlet sel -t -o "{ @@@@id@@@@ : @@@@" -v "//?????:eid" -o "@@@@, @@@@abstract@@@@ : @@@@" -t -v "//abstract/?????" -o "@@@@}" $tmpfile > $tmpfile.json
sed -i -e 's/"/\\"/g' $tmpfile.json
aws s3 ls s3:// | awk '{ count +=1; size += $3 } END { print "Count:", count, "Size:", size, "bytes" }'