You can now find this article on my blog : http://wp.me/p6W-6p
#!/bin/env python | |
import sys | |
import os | |
import json | |
def clean_lines(lines): | |
clean_lines=[] | |
# TODO: a map/reduce or a comprehension might be better | |
for line in lines: |
# encoding: UTF-8 | |
require 'optparse' | |
require 'net/http' | |
require 'json' | |
def parse_options(argv) | |
opts = {} | |
@parser = OptionParser.new do |o| |
#!/bin/bash | |
set -e | |
# overridable defaults | |
GEM_HOME=${GEM_HOME:-/usr/local/passenger} | |
PASSENGER_BIN_PATH=${PASSENGER_BIN_PATH:-$GEM_HOME/bin} | |
PASSENGER_TMPDIR=${PASSENGER_TMPDIR:-/var/tmp} | |
PATH=$PATH:$PASSENGER_BIN_PATH |
>> coll.insert({:date => Date.today}) | |
BSON::InvalidDocument: Date is not currently supported; use a UTC Time instance instead. | |
>> coll.insert({:date => DateTime.now}) | |
BSON::InvalidDocument: DateTime is not currently supported; use a UTC Time instance instead. | |
>> coll.insert({:date => Time.now}) #=> BSON::ObjectId('4dd39768b98f703261000003') |
{ | |
"mappings": { | |
"tweet": { | |
"properties": { | |
"created_at": { | |
"format": "YYYY-MM-dd HH:mm:ss Z", | |
"type": "date" | |
}, | |
"entities": { | |
"properties": { |
check process resque_api_0 | |
with pidfile /home/deploy/apps/api/current/tmp/pids/resque_worker_0.pid | |
start program = "/bin/sh -c 'cd /home/deploy/apps/api/current; GEM_HOME=/home/deploy/.gem/ruby/1.8 GEM_PATH=/home/deploy/.gem/ruby/1.8 PATH=$PATH:/home/deploy/.gem/ruby/1.8/bin:./bin nohup bundle exec rake environment resque:work RAILS_ENV=production QUEUE=* PIDFILE=tmp/pids/resque_worker_0.pid INTERVAL=2 >> log/resque_worker_0.log'" as uid deploy and gid deploy | |
stop program = "/bin/sh -c 'cd /home/deploy/apps/api/current && kill -s QUIT `cat tmp/pids/resque_worker_0.pid` && rm -f tmp/pids/resque_worker_0.pid; exit 0;'" | |
if totalmem is greater than 350 MB for 10 cycles then restart # eating up memory? | |
GROUP resque_api |
#!/bin/sh | |
set -e | |
if [ ! "`id -u`" -eq 0 ]; then | |
echo "Must be root !" | |
exit 0 | |
fi | |
MYSQLUSER=root |
Let's say you have a model, with an files attached, using Paperclip. You have a couple millions of those files and you're not sure that every one of them (and all its thumbnails) are still used by a database record.
You could use this rake task to recursively scan all the directories and check if the files need to be kept or destroyed.
In this example, the model is called Picture
, the attachment is image
and the path is partitioned like images/001/412/497/actual_file.jpg
The task is going down the path. Each time the path ends with 3 triplets of digits ("001/412/497" for example) it looks for a record with the ID 1412497. If such a record doesn't exist, the whole directory is moved to a parallel images_deleted
directory. At the end you can delete the files if you like, or move them to an archive location.
You can use the "dry run" mode : to print which files would be removed
#!/bin/bash | |
set -e | |
### BEGIN INIT INFO | |
# Provides: unicorn | |
# Required-Start: $all | |
# Required-Stop: $network $local_fs $syslog | |
# Default-Start: 2 3 4 5 | |
# Default-Stop: 0 1 6 |