Skip to content

Instantly share code, notes, and snippets.

@skahler-yuga
Last active April 5, 2021 20:31
Show Gist options
  • Save skahler-yuga/1733b5846e0b20c8748773b307a0af76 to your computer and use it in GitHub Desktop.
Save skahler-yuga/1733b5846e0b20c8748773b307a0af76 to your computer and use it in GitHub Desktop.
prometheus won't start

The prometheus container won't start and is repeatedly dying

[root@yugahome01 yugabyte]# docker ps -f "status=exited"
CONTAINER ID        IMAGE                                       COMMAND                  CREATED             STATUS                      PORTS               NAMES
ad1a88de0a24        192.168.10.10:9874/prom-prometheus:v2.2.1   "/bin/prometheus --c…"   21 minutes ago      Exited (0) 21 minutes ago                       prometheus
[root@yugahome01 yugabyte]# docker logs ad1a88de0a24
level=info ts=2021-03-30T22:12:59.367341272Z caller=main.go:220 msg="Starting Prometheus" version="(version=2.2.1, branch=HEAD, revision=bc6058c81272a8d938c05e75607371284236aadc)"
level=info ts=2021-03-30T22:12:59.367487198Z caller=main.go:221 build_context="(go=go1.10, user=root@149e5b3f0829, date=20180314-14:15:45)"
level=info ts=2021-03-30T22:12:59.3675326Z caller=main.go:222 host_details="(Linux 3.10.0-1160.15.2.el7.x86_64 #1 SMP Wed Feb 3 15:06:38 UTC 2021 x86_64 ad1a88de0a24 (none))"
level=info ts=2021-03-30T22:12:59.367578803Z caller=main.go:223 fd_limits="(soft=1048576, hard=1048576)"
level=info ts=2021-03-30T22:12:59.373347848Z caller=web.go:382 component=web msg="Start listening for connections" address=0.0.0.0:9090
level=info ts=2021-03-30T22:12:59.372918441Z caller=main.go:504 msg="Starting TSDB ..."
level=info ts=2021-03-30T22:12:59.382540725Z caller=main.go:398 msg="Stopping scrape discovery manager..."
level=info ts=2021-03-30T22:12:59.38262419Z caller=main.go:411 msg="Stopping notify discovery manager..."
level=info ts=2021-03-30T22:12:59.382654141Z caller=main.go:432 msg="Stopping scrape manager..."
level=info ts=2021-03-30T22:12:59.382695646Z caller=manager.go:460 component="rule manager" msg="Stopping rule manager..."
level=info ts=2021-03-30T22:12:59.382759187Z caller=manager.go:466 component="rule manager" msg="Rule manager stopped"
level=info ts=2021-03-30T22:12:59.382790641Z caller=notifier.go:512 component=notifier msg="Stopping notification manager..."
level=info ts=2021-03-30T22:12:59.383085834Z caller=main.go:394 msg="Scrape discovery manager stopped"
level=info ts=2021-03-30T22:12:59.383149422Z caller=main.go:407 msg="Notify discovery manager stopped"
level=info ts=2021-03-30T22:12:59.383197422Z caller=main.go:426 msg="Scrape manager stopped"
level=info ts=2021-03-30T22:12:59.383259255Z caller=main.go:573 msg="Notifier manager stopped"
level=error ts=2021-03-30T22:12:59.390758147Z caller=main.go:582 err="Opening storage failed open DB in /prometheus: Locked by other process"
level=info ts=2021-03-30T22:12:59.39088373Z caller=main.go:584 msg="See you next time!"

Problem is a left over lock that needs to be cleaned up

[root@yugahome01 yugabyte]# cd /opt/yugabyte/
[root@yugahome01 yugabyte]# ls
postgresql  prometheus_configs  prometheusv2  releases  yugaware
[root@yugahome01 yugabyte]# cd prometheusv2/
[root@yugahome01 prometheusv2]# ls -l
total 4
drwxr-xr-x 3 65534 65534 64 Mar 17 12:00 01F10J2MR1EEBJ4W125EVZ7FZB
drwxr-xr-x 3 65534 65534 64 Mar 18 04:00 01F1290ETXPE0MN2V52BFHY3TK
drwxr-xr-x 3 65534 65534 64 Mar 18 22:00 01F146T02BSC8R62NSHH03G4VC
drwxr-xr-x 3 65534 65534 64 Mar 19 16:00 01F164KHBN5G91DD3HQ69MQTNB
drwxr-xr-x 3 65534 65534 64 Mar 20 10:00 01F182D2JMMEQXMD651W8ENA7W
drwxr-xr-x 3 65534 65534 64 Mar 21 04:00 01F1A06KVW62V7C44EJEJN4S38
drwxr-xr-x 3 65534 65534 64 Mar 21 22:00 01F1BY0543KV8Z7P871PGXDCV2
drwxr-xr-x 3 65534 65534 64 Mar 22 16:00 01F1DVSPC7J177NTA5HQH4A78S
drwxr-xr-x 3 65534 65534 64 Mar 23 10:00 01F1FSK7KWY28952MEAK3925Q4
drwxr-xr-x 3 65534 65534 64 Mar 24 04:00 01F1HQCRVW69CSV0R3GRSYTY4B
drwxr-xr-x 3 65534 65534 64 Mar 24 22:00 01F1KN6A4VARK1WF2HY92F5EPN
drwxr-xr-x 3 65534 65534 64 Mar 25 16:00 01F1NJZVC0QMXNJYN3HWBV1SWJ
drwxr-xr-x 3 65534 65534 64 Mar 26 10:00 01F1QGSCK30AC25ANN35QP7F00
drwxr-xr-x 3 65534 65534 64 Mar 27 04:00 01F1SEJXVKVZMT3156ZVPDQ9RP
drwxr-xr-x 3 65534 65534 64 Mar 27 22:00 01F1VCCF35YWN0B0T2F0F26FNM
drwxr-xr-x 3 65534 65534 64 Mar 28 16:00 01F1XA60CPYB7QZCP60G065SV3
drwxr-xr-x 3 65534 65534 64 Mar 29 10:00 01F1Z7ZHMSMNJ1TERPFZ7REE41
drwxr-xr-x 3 65534 65534 64 Mar 30 04:00 01F215RYW92ES2HH9FF0AXF2CX
drwxr-xr-x 3 65534 65534 64 Mar 30 10:00 01F21TC2FSJFGEFGBYC5KYQ23R
drwxr-xr-x 3 65534 65534 64 Mar 30 10:00 01F21TC346W7WGPSAPW2WR18JJ
-rw------- 1 65534 65534  2 Mar 16 18:44 lock
drwxr-xr-x 2 65534 65534 32 Mar 30 10:00 wal
[root@yugahome01 prometheusv2]# rm lock
rm: remove regular file 'lock'? y
[root@yugahome01 yugabyte]# docker start ad1a88de0a24
ad1a88de0a24
[root@yugahome01 yugabyte]# docker ps --all
CONTAINER ID        IMAGE                                                            COMMAND                  CREATED             STATUS              PORTS                                                                                                NAMES
362ec8468ede        192.168.10.10:9874/nginx:1.13.1                                  "nginx -g 'daemon of…"   25 minutes ago      Up 25 minutes       0.0.0.0:80->80/tcp                                                                                   nginx
7b5a82188c7e        192.168.10.10:9874/yugabyte-yugaware:2.4.2.0-b25                 "bin/yugaware -Dconf…"   25 minutes ago      Up 25 minutes       0.0.0.0:9000->9000/tcp                                                                               yugaware
f9a67026514b        192.168.10.10:9874/postgres:9.6                                  "docker-entrypoint.s…"   25 minutes ago      Up 25 minutes       0.0.0.0:5432->5432/tcp                                                                               postgres
ad1a88de0a24        192.168.10.10:9874/prom-prometheus:v2.2.1                        "/bin/prometheus --c…"   25 minutes ago      Up 19 seconds       0.0.0.0:9090->9090/tcp                                                                               prometheus
3a11fb9519b9        registry.replicated.com/library/statsd-graphite:1.1.7-20210108   "/usr/bin/supervisor…"   25 minutes ago      Up 25 minutes       0.0.0.0:32785->2003/tcp, 0.0.0.0:32784->2004/tcp, 0.0.0.0:32783->2443/tcp, 0.0.0.0:32773->8125/udp   replicated-statsd
83537a54d80d        registry.replicated.com/library/retraced:1.3.42                  "/src/replicated-aud…"   13 days ago         Up 13 days          3000/tcp                                                                                             retraced-processor
e8c394a9181a        registry.replicated.com/library/retraced:1.3.42                  "/src/replicated-aud…"   13 days ago         Up 13 days          0.0.0.0:9873->3000/tcp                                                                               retraced-api
0a6bc4e9a70b        registry.replicated.com/library/retraced:1.3.42                  "/bin/sh -c '/usr/lo…"   13 days ago         Up 13 days          3000/tcp                                                                                             retraced-cron
47c735f13a54        registry.replicated.com/library/retraced-postgres:1.3.42         "docker-entrypoint.s…"   13 days ago         Up 13 days          5432/tcp                                                                                             retraced-postgres
9c93179a19c5        registry.replicated.com/library/retraced-nsq:1.3.42              "/bin/sh -c nsqd"        13 days ago         Up 13 days          4150-4151/tcp, 4160-4161/tcp, 4170-4171/tcp                                                          retraced-nsqd
d6d9b30e68e9        registry.replicated.com/library/premkit:v1.3.8                   "/usr/bin/premkit da…"   13 days ago         Up 13 days          80/tcp, 443/tcp, 2080/tcp, 0.0.0.0:9880->2443/tcp                                                    replicated-premkit
eb354401a219        replicated/replicated-operator:current                           "/usr/bin/replicated…"   13 days ago         Up 13 days                                                                                                               replicated-operator
e5d26f4b24f7        replicated/replicated-ui:current                                 "/usr/bin/replicated…"   13 days ago         Up 13 days          0.0.0.0:8800->8800/tcp                                                                               replicated-ui
88e8043c0e6c        replicated/replicated:current                                    "/usr/bin/entrypoint…"   13 days ago         Up 13 days          0.0.0.0:9874-9879->9874-9879/tcp                                                                     replicated
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment