Skip to content

Instantly share code, notes, and snippets.

@ehlyzov
Last active June 30, 2016 05:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ehlyzov/10ebab2ddc4b10732701683143a0d52e to your computer and use it in GitHub Desktop.
Save ehlyzov/10ebab2ddc4b10732701683143a0d52e to your computer and use it in GitHub Desktop.
Bulk copy script
'use strict';
var Queue = require('bull'),
path = require('path'),
fs = require('fs'),
_ = require('lodash')
if (process.argv.length <= 2) {
console.log("Usage: " + __filename + " path/to/directory");
process.exit(1);
}
var sourceDir = path.resolve(process.argv[2]);
var queue = Queue('files', 6379, '127.0.0.1');
fs.readdir(sourceDir, function(err, items) {
_
.chain(items)
.filter(function(item) {
return item.match(/\.txt$/)
})
.map(function(item) {
return item.replace(/\..+$/, '')
})
.each(function(name) {
queue.add({ file: path.join(sourceDir,name + '.txt.tar.gz'), contentFile: path.join(sourceDir, name + '.txt')})
})
.value()
});
queue.close()
#!/usr/bin/env bash
# Script for preparation huge flat directory to uploading to SWIFT endpoint
#
set -o errexit # exit on error
#set -o nounset # exit on using an unsetted variable
#set -o xtrace # enable debug
set -o pipefail # exit on pipe error
usage() {
cat <<EOF
Usage:
split.sh [-b BATCH_SIZE] [-j NUMBER_OF_JOBS] <source_dir> <target_dir>
Options:
-b BATCH_SIZE maximum files in one archive (default: 50000)
-j NUMBER_OF_JOBS how many tar processes will be run simultaneously (default: 1)
Params:
<source_dir> source directory
<target_dir> directory where archive files will be created
EOF
}
# Defaults
# In case we need some paralleism in future
JOBS=1
# Set archive size
BATCH=2000
# We need this to configure xarg
ARG_MAX=$((`getconf ARG_MAX` - 4096))
while getopts "b:j:" Option; do
case $Option in
b ) BATCH="$OPTARG";;
j ) JOBS="$OPTARG";;
* ) echo "[!] Invalid option" && usage && exit 1;;
esac
done
shift "$((OPTIND-1))"
if [[ -z "$1" || -z "$2" ]]; then
usage
exit 1
fi
# helper for get abspath
canonical_readlink() {
local filename
cd `dirname "$1"`;
filename=`basename "$1"`;
if [ -h "$filename" ]; then
canonical_readlink `readlink "$filename"`;
else
echo "`pwd -P`/$filename";
fi
}
SRC=`canonical_readlink "$1"`
TARGET=`canonical_readlink "$2"`
if [ ! -d ${SRC} ]
then
echo "Source directory doesn't exist: ${SRC}"
exit 1
fi
if [ ! -d ${TARGET} ]
then
echo "Target directory doesn't exist: ${TARGET}"
exit 1
fi
echo "SOURCE: " ${SRC}
echo "TARGET: " ${TARGET}
echo "BATCH SIZE:" ${BATCH}
echo "JOBS:" ${JOBS}
echo "Make ${BATCH}-sized file listings"
find ${SRC} -name '*.txt' -print | awk -v target=${TARGET} -v batch=${BATCH} 'FNR%batch==1{if(FNR==1)c=0; close(out); out=target "/OUT"++c".txt";} {for (i=1; i<=NF; ++i) { sub(".*/", "", $i) } print > out}'
echo "Completed"
echo "Start baking archives"
find ${TARGET} -name 'OUT*.txt' -print | xargs -I FILE -P ${JOBS} -n1 tar -czf FILE.tar.gz -C ${SRC} -T FILE
echo "Completed"
'use strict';
require('dotenv').config();
var request = require('request'),
async = require('async'),
fs = require('fs'),
path = require('path'),
_ = require('lodash'),
Queue = require('bull')
if (process.argv.length <= 3) {
console.log("Usage: " + __filename + " SOURCE_DIR SWIFT_PATH");
process.exit(1);
}
var sourceDir = path.resolve(process.argv[2]);
var targetPath = path.resolve(process.argv[3]);
// authorize
function authenticate(callback) {
request({
url: 'https://auth.selcdn.ru',
headers: {
'X-Auth-User': process.env.SEL_USER,
'X-Auth-Key': process.env.SEL_PASS
}
}, function(err, data) {
if (err) {
throw Error(err)
}
if (data.statusCode == 204) {
var session = {
authToken: data.headers['x-auth-token'],
storageUrl: data.headers['x-storage-url']
}
callback(null, session);
} else {
throw Error('Auth error');
}
});
};
function upload(env, data, callback) {
async.waterfall([
function(next) {
fs.readFile(data.file, next);
},
function(blob, next) {
request({
url: env.session.storageUrl + targetPath + '?extract-archive=tar.gz',
method: 'PUT',
headers: {
'X-Auth-Token': env.session.authToken,
'Accept': 'application/json'
},
body: blob
}, function(err, resp, body) {
console.log(resp.statusCode)
console.log(resp.headers)
console.log(resp.body)
if (err) {
next(err);
} else {
next(null, JSON.parse(body));
}
})
}],
callback
)
}
function verify(env, data, callback) {
if (_.isEmpty(data.response['Errors'])) {
callback(null, true);
} else {
callback({ msg: "Data wasn't uploaded properly" });
}
}
var queue = Queue('files', 6379, '127.0.0.1');
authenticate(function(err, session) {
console.log(session);
queue.process(function(job, done) {
upload({ session: session }, { file: job.data.file }, function(err, data) {
console.log("Job ID", job.jobId, "file", job.data.file);
verify({ session: session}, { file: job.data.file, response: data }, function(err, data) {
if (err) {
done(Error('error verifying. Rerun job.'));
job.retry();
} else {
console.log("Job", job.jobId, "completed");
done();
}
})
});
})
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment