Last active
June 14, 2021 04:43
-
-
Save isitavi/7ed859f5cfdf0934c9286e269026cc86 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
============================================================================================ | |
Step1) Creating a Prometheus user and group | |
============================================================================================ | |
useradd -m -s /bin/false prometheus | |
id prometheus | |
============================================================================================ | |
Step 2) Creating configuration directories for Prometheus | |
============================================================================================ | |
mkdir /etc/prometheus | |
mkdir /var/lib/prometheus | |
Set the ownership on /var/lib/prometheus | |
chown prometheus. /var/lib/prometheus/ | |
============================================================================================ | |
Step 3) Downloading Prometheus tar file (Install only on Prometheus Server) | |
============================================================================================ | |
wget https://github.com/prometheus/prometheus/releases/download/v2.27.1/prometheus-2.27.1.linux-amd64.tar.gz -P /tmp | |
The extracted directory contains 2 binary files prometheus & promtool and that we need to copy to the /usr/local/bin path. | |
cd prometheus-2.27.1.linux-amd64 | |
cp prometheus /usr/local/bin | |
cp promtool /usr/local/bin | |
============================================================================================ | |
Step 4) Creating a configuration file for Prometheus | |
============================================================================================ | |
vim /etc/systemd/system/prometheus.service | |
# Global config | |
global: | |
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. | |
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. | |
scrape_timeout: 15s # scrape_timeout is set to the global default (10s). | |
# A scrape configuration containing exactly one endpoint to scrape:# Here it's Prometheus itself. | |
scrape_configs: | |
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. | |
- job_name: 'prometheus' | |
# metrics_path defaults to '/metrics' | |
# scheme defaults to 'http'. | |
static_configs: | |
- targets: ['localhost:9090'] | |
============================================================================================ | |
Step 5) Creating a Systemd service file for Prometheus Server | |
============================================================================================ | |
vim /etc/systemd/system/prometheus.service | |
[Unit] | |
Description=Prometheus Time Series Collection and Processing Server | |
Wants=network-online.target | |
After=network-online.target | |
[Service] | |
User=prometheus | |
Group=prometheus | |
Type=simple | |
ExecStart=/usr/local/bin/prometheus \ | |
--config.file /etc/prometheus/prometheus.yml \ | |
--storage.tsdb.path /var/lib/prometheus/ \ | |
--web.console.templates=/etc/prometheus/consoles \ | |
--web.console.libraries=/etc/prometheus/console_libraries | |
[Install] | |
WantedBy=multi-user.target | |
systemctl daemon-reload | |
systemctl enable prometheus | |
systemctl start prometheus | |
============================================================================================ | |
Step 5) Install garafana | |
============================================================================================ | |
https://grafana.com/grafana/download | |
wget https://dl.grafana.com/oss/release/grafana-8.0.1-1.x86_64.rpm | |
sudo yum install grafana-8.0.1-1.x86_64.rpm | |
If required to any configuration change plese see this file: | |
/etc/grafana/grafana.ini | |
Example | |
# The HTTP port to use | |
;http_port = 3000 | |
============================================================================================ | |
Step 5) Install & Configure node_exporter on client machine | |
============================================================================================ | |
useradd -m -s /bin/false node_exporter | |
wget https://github.com/prometheus/node_exporter/releases/download/v1.1.2/node_exporter-1.1.2.linux-amd64.tar.gz -P /tmp | |
cd /tmp | |
tar -zxpvf node_exporter-1.1.2.linux-amd64.tar.gz | |
cd node_exporter-1.1.2.linux-amd64 | |
cp node_exporter /usr/local/bin | |
chown node_exporter:node_exporter /usr/local/bin/node_exporter | |
vim /etc/systemd/system/node_exporter.service | |
----------------------------------------------------- | |
[Unit] | |
Description=Prometheus Node Exporter | |
Wants=network-online.target | |
After=network-online.target | |
[Service] | |
User=node_exporter | |
Group=node_exporter | |
Type=simple | |
ExecStart=/usr/local/bin/node_exporter | |
[Install] | |
WantedBy=multi-user.target | |
------------------------------------------------------ | |
systemctl daemon-reload | |
systemctl start node_exporter | |
systemctl enable node_exporter | |
systemctl status node_exporter | |
============================================================================================ | |
Step 6) Install & Configure alertmanager | |
============================================================================================ | |
wget https://github.com/prometheus/alertmanager/releases/download/v0.22.2/alertmanager-0.22.2.linux-amd64.tar.gz -P /tmp | |
cd /tmp | |
tar -xvf alertmanager-0.22.2.linux-amd64.tar.gz | |
mv amtool alertmanager /usr/local/bin | |
sudo mkdir -p /etc/alertmanager | |
mv alertmanager.yml /etc/alertmanagers | |
mkdir -p /data/alertmanager | |
useradd -rs /bin/false alertmanager | |
chown alertmanager:alertmanager /usr/local/bin/amtool /usr/local/bin/alertmanager | |
chown -R alertmanager:alertmanager /data/alertmanager /etc/alertmanager/* | |
vim /etc/systemd/system/alertmanager.service | |
--------------------------------------------------- | |
[Unit] | |
Description=Alertmanager | |
Wants=network-online.target | |
After=network-online.target | |
[Service] | |
User=alertmanager | |
Group=alertmanager | |
Type=simple | |
WorkingDirectory=/etc/alertmanager/ | |
ExecStart=/usr/local/bin/alertmanager --config.file=/etc/alertmanager/alertmanager.yml --web.external-url http://0.0.0.0:9093 | |
[Install] | |
WantedBy=multi-user.target | |
-------------------------------------------------- | |
prometheus.yml | |
============== | |
# Global config | |
global: | |
scrape_interval: 10s # Set the scrape interval to every 15 seconds. Default is every 1 minute. | |
evaluation_interval: 10s # Evaluate rules every 15 seconds. The default is every 1 minute. | |
scrape_timeout: 10s # scrape_timeout is set to the global default (10s). | |
# Alertmanager configuration | |
alerting: | |
alertmanagers: | |
- static_configs: | |
- targets: | |
# - alertmanager:9093 | |
# Rules files should be defined here | |
rule_files: | |
- "rules/*.yml" | |
scrape_configs: | |
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config. | |
- job_name: 'prometheus' | |
# metrics_path defaults to '/metrics' | |
# scheme defaults to 'http'. | |
static_configs: | |
- targets: ['localhost:9090'] | |
- job_name: 'node-expoter' | |
static_configs: | |
- targets: ['node-expoter:9100'] | |
- job_name: 'foreman' | |
static_configs: | |
- targets: ['foreman:9100'] | |
custom_rules.yml | |
================ | |
groups: | |
- name: linux_servers_cpu | |
rules: | |
- record: job:node_cpu_seconds_total:avg_rate5m | |
expr: (100- (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100) | |
- name: linux_servers_memory | |
rules: | |
- record: jobs:node_memory_MemFree_megabytes:free_mem | |
expr: (( node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_Slab_bytes) / 1000000) | |
- record: job:node_memory_MemAvailable_bytes:avg_rate5m | |
expr: (avg_over_time(node_memory_MemAvailable_bytes[5m]) / 1000000000) | |
custom_alert_rule.yml | |
===================== | |
groups: | |
- name: custom_alert_rule_linux | |
rules: | |
- alert: HighCpuUsages | |
expr: ((100- (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100) > 3) | |
annotations: | |
title: "Instance {{ $labels.instance }} consume high CPU." | |
for: 1m | |
labels: | |
severity: critical | |
- alert: NodeExporterDown | |
expr: (up{job =~".*"}) != 1 | |
annotations: | |
title: "Instance {{ $labels.instance }} goes down." | |
for: 1m | |
labels: | |
severity: critical | |
CPU Usage: | |
=========== | |
https://stackoverflow.com/questions/34923788/prometheus-convert-cpu-user-seconds-to-cpu-usage | |
https://www.programmersought.com/article/75056384231/ | |
https://medianetlab.gr/prometheus-101/ | |
https://www.robustperception.io/understanding-machine-cpu-usage | |
https://stackoverflow.com/questions/57357532/get-total-and-free-disk-space-using-prometheus | |
Memory Usage: | |
============= | |
https://github.com/prometheus/node_exporter/issues/877 | |
https://stackoverflow.com/questions/61751232/prometheus-docker-determine-available-memory-per-node-which-metric-is-correc | |
https://github.com/prometheus/node_exporter/issues/924 | |
https://www.reddit.com/r/kubernetes/comments/bmi0kk/how_to_monitor_node_memory_usage_correctly/ | |
https://www.programmersought.com/article/9586810990/ | |
https://access.redhat.com/solutions/406773 | |
https://stackoverflow.com/questions/48835035/average-memory-usage-query-prometheus | |
https://stackoverflow.com/questions/59405030/free-bytes-vs-avail-bytes-in-node-exporter | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For PromQL