Created
June 28, 2016 22:50
-
-
Save pypt/e36057e44fb5ec3dda80e1e1eef04c43 to your computer and use it in GitHub Desktop.
Server rebuilds index after crash, uses up all memory, dies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
ulimit -n 65536 | |
curl "http://packages.erlang-solutions.com/ubuntu/erlang_solutions.asc" | \ | |
sudo apt-key add - | |
echo "deb http://packages.erlang-solutions.com/ubuntu precise contrib" | \ | |
sudo tee -a /etc/apt/sources.list.d/erlang-solutions.list | |
curl -s "https://packagecloud.io/install/repositories/rabbitmq/rabbitmq-server/script.deb.sh" | \ | |
sudo bash | |
sudo apt-get -y update | |
sudo apt-get -y upgrade | |
sudo apt-get -y install esl-erlang erlang-mode rabbitmq-server | |
# (optional) Multicore gzip -d | |
sudo apt-get -y install pigz | |
# Expands to 89 GB | |
curl -O https://s3.amazonaws.com/mediacloud-rabbitmq-reindex-oom-crash/dump/rabbitmq-reindex-oom-crash.tar.gz | |
pigz -dc rabbitmq-reindex-oom-crash.tar.gz | tar xvf - | |
# Please note that it starts on port 5673 (not the default 5672); | |
# web management is available at port 15673 (not the default 15672) | |
RABBITMQ_NODE_IP_ADDRESS="127.0.0.1" \ | |
RABBITMQ_NODE_PORT=5673 \ | |
RABBITMQ_NODENAME="mediacloud@localhost" \ | |
RABBITMQ_BASE="`pwd`/mediacloud-rabbitmq-reindex-oom-crash" \ | |
RABBITMQ_CONFIG_FILE="`pwd`/mediacloud-rabbitmq-reindex-oom-crash/rabbitmq" \ | |
RABBITMQ_MNESIA_BASE="`pwd`/mediacloud-rabbitmq-reindex-oom-crash/mnesia" \ | |
RABBITMQ_LOG_BASE="`pwd`/mediacloud-rabbitmq-reindex-oom-crash/logs" \ | |
RABBITMQ_ENABLED_PLUGINS_FILE="`pwd`/mediacloud-rabbitmq-reindex-oom-crash/enabled_plugins" \ | |
/usr/lib/rabbitmq/bin/rabbitmq-server | |
tail -f rabbitmq-reindex-oom-crash/logs/mediacloud@localhost.log | |
# | |
# Expected: | |
# --------- | |
# RabbitMQ rebuilds whatever index it needs to rebuild after unclean shutdown, | |
# starts listening on ports 5673 for messages, 15673 for web management, all | |
# those millions of messages become available again. | |
# | |
# Actual: | |
# ------- | |
# RabbitMQ starts intensive IO / CPU operations after printing | |
# "msg_store_persistent: rebuilding indices from scratch" to log, goes on for | |
# 30-40 minutes, then one or more of the following happens: | |
# | |
# * Erlang suddenly spaws 500+ processes, most of them idle in "D" | |
# ("uninterruptible sleep") process state, machine load rises to 500+ too, | |
# Erlang crashes after a while; | |
# * Erlang crashes with erl_crash.dump mentioning "eheap_alloc: Cannot allocate | |
# <usually around 5-6 GB> bytes of memory (of type "heap", thread 2).", even | |
# though more RAM is available (tried on multiple machines ranging from 16 GB | |
# to 192 GB of RAM and plenty of swap space); | |
# | |
# At no point RabbitMQ does open its own (5673) or web management (15673) ports. | |
# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment