Last active
June 30, 2016 05:19
-
-
Save ehlyzov/10ebab2ddc4b10732701683143a0d52e to your computer and use it in GitHub Desktop.
Bulk copy script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
var Queue = require('bull'), | |
path = require('path'), | |
fs = require('fs'), | |
_ = require('lodash') | |
if (process.argv.length <= 2) { | |
console.log("Usage: " + __filename + " path/to/directory"); | |
process.exit(1); | |
} | |
var sourceDir = path.resolve(process.argv[2]); | |
var queue = Queue('files', 6379, '127.0.0.1'); | |
fs.readdir(sourceDir, function(err, items) { | |
_ | |
.chain(items) | |
.filter(function(item) { | |
return item.match(/\.txt$/) | |
}) | |
.map(function(item) { | |
return item.replace(/\..+$/, '') | |
}) | |
.each(function(name) { | |
queue.add({ file: path.join(sourceDir,name + '.txt.tar.gz'), contentFile: path.join(sourceDir, name + '.txt')}) | |
}) | |
.value() | |
}); | |
queue.close() | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Script for preparation huge flat directory to uploading to SWIFT endpoint | |
# | |
set -o errexit # exit on error | |
#set -o nounset # exit on using an unsetted variable | |
#set -o xtrace # enable debug | |
set -o pipefail # exit on pipe error | |
usage() { | |
cat <<EOF | |
Usage: | |
split.sh [-b BATCH_SIZE] [-j NUMBER_OF_JOBS] <source_dir> <target_dir> | |
Options: | |
-b BATCH_SIZE maximum files in one archive (default: 50000) | |
-j NUMBER_OF_JOBS how many tar processes will be run simultaneously (default: 1) | |
Params: | |
<source_dir> source directory | |
<target_dir> directory where archive files will be created | |
EOF | |
} | |
# Defaults | |
# In case we need some paralleism in future | |
JOBS=1 | |
# Set archive size | |
BATCH=2000 | |
# We need this to configure xarg | |
ARG_MAX=$((`getconf ARG_MAX` - 4096)) | |
while getopts "b:j:" Option; do | |
case $Option in | |
b ) BATCH="$OPTARG";; | |
j ) JOBS="$OPTARG";; | |
* ) echo "[!] Invalid option" && usage && exit 1;; | |
esac | |
done | |
shift "$((OPTIND-1))" | |
if [[ -z "$1" || -z "$2" ]]; then | |
usage | |
exit 1 | |
fi | |
# helper for get abspath | |
canonical_readlink() { | |
local filename | |
cd `dirname "$1"`; | |
filename=`basename "$1"`; | |
if [ -h "$filename" ]; then | |
canonical_readlink `readlink "$filename"`; | |
else | |
echo "`pwd -P`/$filename"; | |
fi | |
} | |
SRC=`canonical_readlink "$1"` | |
TARGET=`canonical_readlink "$2"` | |
if [ ! -d ${SRC} ] | |
then | |
echo "Source directory doesn't exist: ${SRC}" | |
exit 1 | |
fi | |
if [ ! -d ${TARGET} ] | |
then | |
echo "Target directory doesn't exist: ${TARGET}" | |
exit 1 | |
fi | |
echo "SOURCE: " ${SRC} | |
echo "TARGET: " ${TARGET} | |
echo "BATCH SIZE:" ${BATCH} | |
echo "JOBS:" ${JOBS} | |
echo "Make ${BATCH}-sized file listings" | |
find ${SRC} -name '*.txt' -print | awk -v target=${TARGET} -v batch=${BATCH} 'FNR%batch==1{if(FNR==1)c=0; close(out); out=target "/OUT"++c".txt";} {for (i=1; i<=NF; ++i) { sub(".*/", "", $i) } print > out}' | |
echo "Completed" | |
echo "Start baking archives" | |
find ${TARGET} -name 'OUT*.txt' -print | xargs -I FILE -P ${JOBS} -n1 tar -czf FILE.tar.gz -C ${SRC} -T FILE | |
echo "Completed" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
require('dotenv').config(); | |
var request = require('request'), | |
async = require('async'), | |
fs = require('fs'), | |
path = require('path'), | |
_ = require('lodash'), | |
Queue = require('bull') | |
if (process.argv.length <= 3) { | |
console.log("Usage: " + __filename + " SOURCE_DIR SWIFT_PATH"); | |
process.exit(1); | |
} | |
var sourceDir = path.resolve(process.argv[2]); | |
var targetPath = path.resolve(process.argv[3]); | |
// authorize | |
function authenticate(callback) { | |
request({ | |
url: 'https://auth.selcdn.ru', | |
headers: { | |
'X-Auth-User': process.env.SEL_USER, | |
'X-Auth-Key': process.env.SEL_PASS | |
} | |
}, function(err, data) { | |
if (err) { | |
throw Error(err) | |
} | |
if (data.statusCode == 204) { | |
var session = { | |
authToken: data.headers['x-auth-token'], | |
storageUrl: data.headers['x-storage-url'] | |
} | |
callback(null, session); | |
} else { | |
throw Error('Auth error'); | |
} | |
}); | |
}; | |
function upload(env, data, callback) { | |
async.waterfall([ | |
function(next) { | |
fs.readFile(data.file, next); | |
}, | |
function(blob, next) { | |
request({ | |
url: env.session.storageUrl + targetPath + '?extract-archive=tar.gz', | |
method: 'PUT', | |
headers: { | |
'X-Auth-Token': env.session.authToken, | |
'Accept': 'application/json' | |
}, | |
body: blob | |
}, function(err, resp, body) { | |
console.log(resp.statusCode) | |
console.log(resp.headers) | |
console.log(resp.body) | |
if (err) { | |
next(err); | |
} else { | |
next(null, JSON.parse(body)); | |
} | |
}) | |
}], | |
callback | |
) | |
} | |
function verify(env, data, callback) { | |
if (_.isEmpty(data.response['Errors'])) { | |
callback(null, true); | |
} else { | |
callback({ msg: "Data wasn't uploaded properly" }); | |
} | |
} | |
var queue = Queue('files', 6379, '127.0.0.1'); | |
authenticate(function(err, session) { | |
console.log(session); | |
queue.process(function(job, done) { | |
upload({ session: session }, { file: job.data.file }, function(err, data) { | |
console.log("Job ID", job.jobId, "file", job.data.file); | |
verify({ session: session}, { file: job.data.file, response: data }, function(err, data) { | |
if (err) { | |
done(Error('error verifying. Rerun job.')); | |
job.retry(); | |
} else { | |
console.log("Job", job.jobId, "completed"); | |
done(); | |
} | |
}) | |
}); | |
}) | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment