isitavi/Prometheus

## Prometheus
============================================================================================
Step1) Creating a Prometheus user and group
============================================================================================
useradd -m -s /bin/false prometheus
id prometheus

============================================================================================
Step 2) Creating configuration directories for Prometheus
============================================================================================
mkdir /etc/prometheus
mkdir /var/lib/prometheus

Set the ownership on /var/lib/prometheus
chown prometheus. /var/lib/prometheus/

============================================================================================
Step 3) Downloading Prometheus tar file (Install only on Prometheus Server)
============================================================================================
wget https://github.com/prometheus/prometheus/releases/download/v2.27.1/prometheus-2.27.1.linux-amd64.tar.gz -P /tmp

The extracted directory contains 2 binary files  prometheus & promtool and that we need to copy to the /usr/local/bin path.
cd prometheus-2.27.1.linux-amd64
cp prometheus  /usr/local/bin
cp promtool  /usr/local/bin

============================================================================================
Step 4) Creating a configuration file for Prometheus
============================================================================================
vim /etc/systemd/system/prometheus.service

# Global config

global:

  scrape_interval:     15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.

  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.

  scrape_timeout: 15s  # scrape_timeout is set to the global default (10s).

# A scrape configuration containing exactly one endpoint to scrape:# Here it's Prometheus itself.

scrape_configs:

  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.

  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'

    # scheme defaults to 'http'.

    static_configs:

    - targets: ['localhost:9090']


============================================================================================
Step 5) Creating a Systemd service file for Prometheus Server
============================================================================================
vim /etc/systemd/system/prometheus.service

[Unit]
Description=Prometheus Time Series Collection and Processing Server
Wants=network-online.target
After=network-online.target

[Service]
User=prometheus
Group=prometheus
Type=simple
ExecStart=/usr/local/bin/prometheus \
    --config.file /etc/prometheus/prometheus.yml \
    --storage.tsdb.path /var/lib/prometheus/ \
    --web.console.templates=/etc/prometheus/consoles \
    --web.console.libraries=/etc/prometheus/console_libraries

[Install]
WantedBy=multi-user.target


systemctl daemon-reload
systemctl enable prometheus
systemctl start prometheus

============================================================================================
Step 5) Install garafana
============================================================================================
https://grafana.com/grafana/download

wget https://dl.grafana.com/oss/release/grafana-8.0.1-1.x86_64.rpm
sudo yum install grafana-8.0.1-1.x86_64.rpm

If required to any configuration change plese see this file:
/etc/grafana/grafana.ini
Example

# The HTTP port  to use
;http_port = 3000


============================================================================================
Step 5) Install & Configure node_exporter on client machine
============================================================================================
useradd -m -s /bin/false node_exporter
wget https://github.com/prometheus/node_exporter/releases/download/v1.1.2/node_exporter-1.1.2.linux-amd64.tar.gz -P /tmp
cd /tmp
tar -zxpvf node_exporter-1.1.2.linux-amd64.tar.gz
cd node_exporter-1.1.2.linux-amd64
cp node_exporter /usr/local/bin
chown node_exporter:node_exporter /usr/local/bin/node_exporter
vim /etc/systemd/system/node_exporter.service
-----------------------------------------------------
[Unit]
Description=Prometheus Node Exporter
Wants=network-online.target
After=network-online.target

[Service]
User=node_exporter
Group=node_exporter
Type=simple
ExecStart=/usr/local/bin/node_exporter

[Install]
WantedBy=multi-user.target
------------------------------------------------------
systemctl daemon-reload
systemctl start node_exporter
systemctl enable node_exporter
systemctl status node_exporter

============================================================================================
Step 6) Install & Configure alertmanager
============================================================================================
wget https://github.com/prometheus/alertmanager/releases/download/v0.22.2/alertmanager-0.22.2.linux-amd64.tar.gz -P /tmp
cd /tmp
tar -xvf alertmanager-0.22.2.linux-amd64.tar.gz
mv amtool alertmanager /usr/local/bin
sudo mkdir -p /etc/alertmanager
mv alertmanager.yml /etc/alertmanagers
mkdir -p /data/alertmanager
useradd -rs /bin/false alertmanager
chown alertmanager:alertmanager /usr/local/bin/amtool /usr/local/bin/alertmanager
chown -R alertmanager:alertmanager /data/alertmanager /etc/alertmanager/*
vim /etc/systemd/system/alertmanager.service
---------------------------------------------------
[Unit]
Description=Alertmanager
Wants=network-online.target
After=network-online.target

[Service]
User=alertmanager
Group=alertmanager
Type=simple
WorkingDirectory=/etc/alertmanager/
ExecStart=/usr/local/bin/alertmanager --config.file=/etc/alertmanager/alertmanager.yml --web.external-url http://0.0.0.0:9093

[Install]
WantedBy=multi-user.target
--------------------------------------------------


prometheus.yml
==============
# Global config
global:
  scrape_interval: 10s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 10s # Evaluate rules every 15 seconds. The default is every 1 minute.
  scrape_timeout: 10s  # scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# Rules files should be defined here
rule_files:
  - "rules/*.yml"

scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: 'prometheus'
    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
    - targets: ['localhost:9090']


  - job_name: 'node-expoter'
    static_configs:
    - targets: ['node-expoter:9100']

  - job_name: 'foreman'
    static_configs:
    - targets: ['foreman:9100']


custom_rules.yml
================
groups:
  - name: linux_servers_cpu
    rules:
    - record: job:node_cpu_seconds_total:avg_rate5m
      expr: (100- (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100)

  - name: linux_servers_memory
    rules:
    - record: jobs:node_memory_MemFree_megabytes:free_mem
      expr: (( node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_Slab_bytes) / 1000000)

    - record: job:node_memory_MemAvailable_bytes:avg_rate5m
      expr: (avg_over_time(node_memory_MemAvailable_bytes[5m]) / 1000000000)


  custom_alert_rule.yml
  =====================
  groups:
  - name: custom_alert_rule_linux
    rules:
    - alert: HighCpuUsages
      expr: ((100- (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100) > 3)
      annotations:
        title:  "Instance {{ $labels.instance }} consume high CPU."
      for: 1m
      labels:
        severity: critical

    - alert: NodeExporterDown
      expr: (up{job =~".*"}) != 1
      annotations:
        title: "Instance {{ $labels.instance }} goes down."
      for: 1m
      labels:
        severity: critical


CPU Usage:
===========
https://stackoverflow.com/questions/34923788/prometheus-convert-cpu-user-seconds-to-cpu-usage
https://www.programmersought.com/article/75056384231/
https://medianetlab.gr/prometheus-101/
https://www.robustperception.io/understanding-machine-cpu-usage
https://stackoverflow.com/questions/57357532/get-total-and-free-disk-space-using-prometheus

Memory Usage:
=============
https://github.com/prometheus/node_exporter/issues/877
https://stackoverflow.com/questions/61751232/prometheus-docker-determine-available-memory-per-node-which-metric-is-correc
https://github.com/prometheus/node_exporter/issues/924
https://www.reddit.com/r/kubernetes/comments/bmi0kk/how_to_monitor_node_memory_usage_correctly/
https://www.programmersought.com/article/9586810990/
https://access.redhat.com/solutions/406773
https://stackoverflow.com/questions/48835035/average-memory-usage-query-prometheus
https://stackoverflow.com/questions/59405030/free-bytes-vs-avail-bytes-in-node-exporter
	============================================================================================
	Step1) Creating a Prometheus user and group
	============================================================================================
	useradd -m -s /bin/false prometheus
	id prometheus

	============================================================================================
	Step 2) Creating configuration directories for Prometheus
	============================================================================================
	mkdir /etc/prometheus
	mkdir /var/lib/prometheus

	Set the ownership on /var/lib/prometheus
	chown prometheus. /var/lib/prometheus/

	============================================================================================
	Step 3) Downloading Prometheus tar file (Install only on Prometheus Server)
	============================================================================================
	wget https://github.com/prometheus/prometheus/releases/download/v2.27.1/prometheus-2.27.1.linux-amd64.tar.gz -P /tmp

	The extracted directory contains 2 binary files prometheus & promtool and that we need to copy to the /usr/local/bin path.
	cd prometheus-2.27.1.linux-amd64
	cp prometheus /usr/local/bin
	cp promtool /usr/local/bin

	============================================================================================
	Step 4) Creating a configuration file for Prometheus
	============================================================================================
	vim /etc/systemd/system/prometheus.service

	# Global config

	global:

	scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.

	evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.

	scrape_timeout: 15s # scrape_timeout is set to the global default (10s).

	# A scrape configuration containing exactly one endpoint to scrape:# Here it's Prometheus itself.

	scrape_configs:

	# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.

	- job_name: 'prometheus'

	# metrics_path defaults to '/metrics'

	# scheme defaults to 'http'.

	static_configs:

	- targets: ['localhost:9090']


	============================================================================================
	Step 5) Creating a Systemd service file for Prometheus Server
	============================================================================================
	vim /etc/systemd/system/prometheus.service

	[Unit]
	Description=Prometheus Time Series Collection and Processing Server
	Wants=network-online.target
	After=network-online.target

	[Service]
	User=prometheus
	Group=prometheus
	Type=simple
	ExecStart=/usr/local/bin/prometheus \
	--config.file /etc/prometheus/prometheus.yml \
	--storage.tsdb.path /var/lib/prometheus/ \
	--web.console.templates=/etc/prometheus/consoles \
	--web.console.libraries=/etc/prometheus/console_libraries

	[Install]
	WantedBy=multi-user.target


	systemctl daemon-reload
	systemctl enable prometheus
	systemctl start prometheus

	============================================================================================
	Step 5) Install garafana
	============================================================================================
	https://grafana.com/grafana/download

	wget https://dl.grafana.com/oss/release/grafana-8.0.1-1.x86_64.rpm
	sudo yum install grafana-8.0.1-1.x86_64.rpm

	If required to any configuration change plese see this file:
	/etc/grafana/grafana.ini
	Example

	# The HTTP port to use
	;http_port = 3000


	============================================================================================
	Step 5) Install & Configure node_exporter on client machine
	============================================================================================
	useradd -m -s /bin/false node_exporter
	wget https://github.com/prometheus/node_exporter/releases/download/v1.1.2/node_exporter-1.1.2.linux-amd64.tar.gz -P /tmp
	cd /tmp
	tar -zxpvf node_exporter-1.1.2.linux-amd64.tar.gz
	cd node_exporter-1.1.2.linux-amd64
	cp node_exporter /usr/local/bin
	chown node_exporter:node_exporter /usr/local/bin/node_exporter
	vim /etc/systemd/system/node_exporter.service
	-----------------------------------------------------
	[Unit]
	Description=Prometheus Node Exporter
	Wants=network-online.target
	After=network-online.target

	[Service]
	User=node_exporter
	Group=node_exporter
	Type=simple
	ExecStart=/usr/local/bin/node_exporter

	[Install]
	WantedBy=multi-user.target
	------------------------------------------------------
	systemctl daemon-reload
	systemctl start node_exporter
	systemctl enable node_exporter
	systemctl status node_exporter

	============================================================================================
	Step 6) Install & Configure alertmanager
	============================================================================================
	wget https://github.com/prometheus/alertmanager/releases/download/v0.22.2/alertmanager-0.22.2.linux-amd64.tar.gz -P /tmp
	cd /tmp
	tar -xvf alertmanager-0.22.2.linux-amd64.tar.gz
	mv amtool alertmanager /usr/local/bin
	sudo mkdir -p /etc/alertmanager
	mv alertmanager.yml /etc/alertmanagers
	mkdir -p /data/alertmanager
	useradd -rs /bin/false alertmanager
	chown alertmanager:alertmanager /usr/local/bin/amtool /usr/local/bin/alertmanager
	chown -R alertmanager:alertmanager /data/alertmanager /etc/alertmanager/*
	vim /etc/systemd/system/alertmanager.service
	---------------------------------------------------
	[Unit]
	Description=Alertmanager
	Wants=network-online.target
	After=network-online.target

	[Service]
	User=alertmanager
	Group=alertmanager
	Type=simple
	WorkingDirectory=/etc/alertmanager/
	ExecStart=/usr/local/bin/alertmanager --config.file=/etc/alertmanager/alertmanager.yml --web.external-url http://0.0.0.0:9093

	[Install]
	WantedBy=multi-user.target
	--------------------------------------------------


	prometheus.yml
	==============
	# Global config
	global:
	scrape_interval: 10s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
	evaluation_interval: 10s # Evaluate rules every 15 seconds. The default is every 1 minute.
	scrape_timeout: 10s # scrape_timeout is set to the global default (10s).

	# Alertmanager configuration
	alerting:
	alertmanagers:
	- static_configs:
	- targets:
	# - alertmanager:9093

	# Rules files should be defined here
	rule_files:
	- "rules/*.yml"

	scrape_configs:
	# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
	- job_name: 'prometheus'
	# metrics_path defaults to '/metrics'
	# scheme defaults to 'http'.
	static_configs:
	- targets: ['localhost:9090']


	- job_name: 'node-expoter'
	static_configs:
	- targets: ['node-expoter:9100']

	- job_name: 'foreman'
	static_configs:
	- targets: ['foreman:9100']


	custom_rules.yml
	================
	groups:
	- name: linux_servers_cpu
	rules:
	- record: job:node_cpu_seconds_total:avg_rate5m
	expr: (100- (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100)

	- name: linux_servers_memory
	rules:
	- record: jobs:node_memory_MemFree_megabytes:free_mem
	expr: (( node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes - node_memory_Slab_bytes) / 1000000)

	- record: job:node_memory_MemAvailable_bytes:avg_rate5m
	expr: (avg_over_time(node_memory_MemAvailable_bytes[5m]) / 1000000000)


	custom_alert_rule.yml
	=====================
	groups:
	- name: custom_alert_rule_linux
	rules:
	- alert: HighCpuUsages
	expr: ((100- (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[5m]))) * 100) > 3)
	annotations:
	title: "Instance {{ $labels.instance }} consume high CPU."
	for: 1m
	labels:
	severity: critical

	- alert: NodeExporterDown
	expr: (up{job =~".*"}) != 1
	annotations:
	title: "Instance {{ $labels.instance }} goes down."
	for: 1m
	labels:
	severity: critical





















	CPU Usage:
	===========
	https://stackoverflow.com/questions/34923788/prometheus-convert-cpu-user-seconds-to-cpu-usage
	https://www.programmersought.com/article/75056384231/
	https://medianetlab.gr/prometheus-101/
	https://www.robustperception.io/understanding-machine-cpu-usage
	https://stackoverflow.com/questions/57357532/get-total-and-free-disk-space-using-prometheus

	Memory Usage:
	=============
	https://github.com/prometheus/node_exporter/issues/877
	https://stackoverflow.com/questions/61751232/prometheus-docker-determine-available-memory-per-node-which-metric-is-correc
	https://github.com/prometheus/node_exporter/issues/924
	https://www.reddit.com/r/kubernetes/comments/bmi0kk/how_to_monitor_node_memory_usage_correctly/
	https://www.programmersought.com/article/9586810990/
	https://access.redhat.com/solutions/406773
	https://stackoverflow.com/questions/48835035/average-memory-usage-query-prometheus
	https://stackoverflow.com/questions/59405030/free-bytes-vs-avail-bytes-in-node-exporter