summaryrefslogtreecommitdiffstats
path: root/roles/openshift_prometheus/templates
diff options
context:
space:
mode:
authorZohar Galor <zgalor@redhat.com>2017-06-20 17:28:07 +0300
committerZohar Galor <zgalor@redhat.com>2017-09-06 12:08:41 +0300
commitec75d0ac888f3fab87f8d335224596df045e260a (patch)
treecc685087a63d6388cb447e532d92564e9c2cc393 /roles/openshift_prometheus/templates
parentb5cf769b97691d75e07d1e0e3b29ecbc31ba32ea (diff)
downloadopenshift-ec75d0ac888f3fab87f8d335224596df045e260a.tar.gz
openshift-ec75d0ac888f3fab87f8d335224596df045e260a.tar.bz2
openshift-ec75d0ac888f3fab87f8d335224596df045e260a.tar.xz
openshift-ec75d0ac888f3fab87f8d335224596df045e260a.zip
Create ansible role for deploying prometheus on openshift
A new role for installing prometheus on openshift. Depends on `openshift_hosted_prometheus_deploy` flag role creates: - prometheus namespace - prometheus clusterrolebinding and service account - pvs for prometheus, alertmanager and alertbuffer for internal nfs - prometheus pod with prometheus behind oauth-proxy, alertmanager and alert-buffer behind oauth-proxy - prometheus and alertmanager configmaps - prometheus and alerts services and direct routes - prometheus, alertmanager and alert-buffer pvcs
Diffstat (limited to 'roles/openshift_prometheus/templates')
-rw-r--r--roles/openshift_prometheus/templates/alertmanager.yml.j220
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prom-pv-server.yml.j215
-rw-r--r--roles/openshift_prometheus/templates/prometheus.rules.j24
-rw-r--r--roles/openshift_prometheus/templates/prometheus.yml.j2174
-rw-r--r--roles/openshift_prometheus/templates/prometheus_deployment.j2240
7 files changed, 483 insertions, 0 deletions
diff --git a/roles/openshift_prometheus/templates/alertmanager.yml.j2 b/roles/openshift_prometheus/templates/alertmanager.yml.j2
new file mode 100644
index 000000000..6c432a3d0
--- /dev/null
+++ b/roles/openshift_prometheus/templates/alertmanager.yml.j2
@@ -0,0 +1,20 @@
+global:
+
+# The root route on which each incoming alert enters.
+route:
+ # default route if none match
+ receiver: alert-buffer-wh
+
+ # The labels by which incoming alerts are grouped together. For example,
+ # multiple alerts coming in for cluster=A and alertname=LatencyHigh would
+ # be batched into a single group.
+ # TODO:
+ group_by: []
+
+ # All the above attributes are inherited by all child routes and can
+ # overwritten on each.
+
+receivers:
+- name: alert-buffer-wh
+ webhook_configs:
+ - url: http://localhost:9099/topics/alerts
diff --git a/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2
new file mode 100644
index 000000000..55a5e19c3
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prom-pv-alertbuffer.yml.j2
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus-alertbuffer
+ labels:
+ storage: prometheus-alertbuffer
+spec:
+ capacity:
+ storage: 15Gi
+ accessModes:
+ - ReadWriteOnce
+ nfs:
+ path: /exports/prometheus-alertbuffer
+ server: {{ openshift_prometheus_nfs_server }}
+ persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2
new file mode 100644
index 000000000..4ee518735
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prom-pv-alertmanager.yml.j2
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus-alertmanager
+ labels:
+ storage: prometheus-alertmanager
+spec:
+ capacity:
+ storage: 15Gi
+ accessModes:
+ - ReadWriteOnce
+ nfs:
+ path: /exports/prometheus-alertmanager
+ server: {{ openshift_prometheus_nfs_server }}
+ persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prom-pv-server.yml.j2 b/roles/openshift_prometheus/templates/prom-pv-server.yml.j2
new file mode 100644
index 000000000..933bf0f60
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prom-pv-server.yml.j2
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+ name: prometheus
+ labels:
+ storage: prometheus
+spec:
+ capacity:
+ storage: 15Gi
+ accessModes:
+ - ReadWriteOnce
+ nfs:
+ path: /exports/prometheus
+ server: {{ openshift_prometheus_nfs_server }}
+ persistentVolumeReclaimPolicy: Retain
diff --git a/roles/openshift_prometheus/templates/prometheus.rules.j2 b/roles/openshift_prometheus/templates/prometheus.rules.j2
new file mode 100644
index 000000000..e861dc127
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus.rules.j2
@@ -0,0 +1,4 @@
+groups:
+- name: example-rules
+ interval: 30s # defaults to global interval
+ rules:
diff --git a/roles/openshift_prometheus/templates/prometheus.yml.j2 b/roles/openshift_prometheus/templates/prometheus.yml.j2
new file mode 100644
index 000000000..63430f834
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus.yml.j2
@@ -0,0 +1,174 @@
+rule_files:
+ - 'prometheus.rules'
+{% if openshift_prometheus_additional_rules_file is defined and openshift_prometheus_additional_rules_file is not none %}
+ - 'prometheus.additional.rules'
+{% endif %}
+
+
+
+# A scrape configuration for running Prometheus on a Kubernetes cluster.
+# This uses separate scrape configs for cluster components (i.e. API server, node)
+# and services to allow each to use different authentication configs.
+#
+# Kubernetes labels will be added as Prometheus labels on metrics via the
+# `labelmap` relabeling action.
+
+# Scrape config for API servers.
+#
+# Kubernetes exposes API servers as endpoints to the default/kubernetes
+# service so this uses `endpoints` role and uses relabelling to only keep
+# the endpoints associated with the default/kubernetes service using the
+# default named port `https`. This works for single API server deployments as
+# well as HA API server deployments.
+scrape_configs:
+- job_name: 'kubernetes-apiservers'
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ # Keep only the default/kubernetes service endpoints for the https port. This
+ # will add targets for each API server which Kubernetes adds an endpoint to
+ # the default/kubernetes service.
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+ action: keep
+ regex: default;kubernetes;https
+
+# Scrape config for nodes.
+#
+# Each node exposes a /metrics endpoint that contains operational metrics for
+# the Kubelet and other components.
+- job_name: 'kubernetes-nodes'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ kubernetes_sd_configs:
+ - role: node
+
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for controllers.
+#
+# Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for
+# the controllers.
+#
+# TODO: move this to a pure endpoints based metrics gatherer when controllers are exposed via
+# endpoints.
+- job_name: 'kubernetes-controllers'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ # Keep only the default/kubernetes service endpoints for the https port, and then
+ # set the port to 8444. This is the default configuration for the controllers on OpenShift
+ # masters.
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
+ action: keep
+ regex: default;kubernetes;https
+ - source_labels: [__address__]
+ action: replace
+ target_label: __address__
+ regex: (.+)(?::\d+)
+ replacement: $1:8444
+
+# Scrape config for cAdvisor.
+#
+# Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that
+# reports container metrics for each running pod. Scrape those by default.
+- job_name: 'kubernetes-cadvisor'
+
+ scheme: https
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
+
+{% if kubernetes_version | float() >= 1.7 | float() %}
+ metrics_path: /metrics/cadvisor
+{% else %}
+ metrics_path: /metrics
+{% endif %}
+
+ kubernetes_sd_configs:
+ - role: node
+
+ relabel_configs:
+ - action: labelmap
+ regex: __meta_kubernetes_node_label_(.+)
+
+# Scrape config for service endpoints.
+#
+# The relabeling allows the actual service scrape endpoint to be configured
+# via the following annotations:
+#
+# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
+# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
+# to set this to `https` & most likely set the `tls_config` of the scrape config.
+# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
+# * `prometheus.io/port`: If the metrics are exposed on a different port to the
+# service then set this appropriately.
+- job_name: 'kubernetes-service-endpoints'
+
+ tls_config:
+ ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
+ # TODO: this should be per target
+ insecure_skip_verify: true
+
+ kubernetes_sd_configs:
+ - role: endpoints
+
+ relabel_configs:
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
+ action: keep
+ regex: true
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
+ action: replace
+ target_label: __scheme__
+ regex: (https?)
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
+ action: replace
+ target_label: __metrics_path__
+ regex: (.+)
+ - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
+ action: replace
+ target_label: __address__
+ regex: (.+)(?::\d+);(\d+)
+ replacement: $1:$2
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_username]
+ action: replace
+ target_label: __basic_auth_username__
+ regex: (.+)
+ - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_password]
+ action: replace
+ target_label: __basic_auth_password__
+ regex: (.+)
+ - action: labelmap
+ regex: __meta_kubernetes_service_label_(.+)
+ - source_labels: [__meta_kubernetes_namespace]
+ action: replace
+ target_label: kubernetes_namespace
+ - source_labels: [__meta_kubernetes_service_name]
+ action: replace
+ target_label: kubernetes_name
+
+alerting:
+ alertmanagers:
+ - scheme: http
+ static_configs:
+ - targets:
+ - "localhost:9093"
diff --git a/roles/openshift_prometheus/templates/prometheus_deployment.j2 b/roles/openshift_prometheus/templates/prometheus_deployment.j2
new file mode 100644
index 000000000..98c117f19
--- /dev/null
+++ b/roles/openshift_prometheus/templates/prometheus_deployment.j2
@@ -0,0 +1,240 @@
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+ name: prometheus
+ namespace: {{ namespace }}
+ labels:
+ app: prometheus
+spec:
+ replicas: {{ prom_replicas|default(1) }}
+ selector:
+ provider: openshift
+ matchLabels:
+ app: prometheus
+ template:
+ metadata:
+ name: prometheus
+ labels:
+ app: prometheus
+ spec:
+ serviceAccountName: prometheus
+{% if openshift_prometheus_node_selector is iterable and openshift_prometheus_node_selector | length > 0 %}
+ nodeSelector:
+{% for key, value in openshift_prometheus_node_selector.iteritems() %}
+ {{key}}: "{{value}}"
+{% endfor %}
+{% endif %}
+ containers:
+ # Deploy Prometheus behind an oauth proxy
+ - name: prom-proxy
+ image: "{{ openshift_prometheus_image_proxy }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_memory_requests_limit_proxy is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 8443
+ name: web
+ args:
+ - -provider=openshift
+ - -https-address=:8443
+ - -http-address=
+ - -email-domain=*
+ - -upstream=http://localhost:9090
+ - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+ - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+ - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ - -skip-auth-regex=^/metrics
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: prometheus-tls
+ - mountPath: /etc/proxy/secrets
+ name: prometheus-secrets
+ - mountPath: /prometheus
+ name: prometheus-data
+
+ - name: prometheus
+ args:
+ - --storage.tsdb.retention=6h
+ - --config.file=/etc/prometheus/prometheus.yml
+ - --web.listen-address=localhost:9090
+ image: "{{ openshift_prometheus_image_prometheus }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_memory_requests is defined and openshift_prometheus_memory_requests is not none %}
+ memory: "{{openshift_prometheus_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_cpu_requests is defined and openshift_prometheus_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_memory_limit is defined and openshift_prometheus_memory_limit is not none %}
+ memory: "{{ openshift_prometheus_memory_limit }}"
+{% endif %}
+{% if openshift_prometheus_cpu_limit is defined and openshift_prometheus_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_cpu_limit}}"
+{% endif %}
+
+ volumeMounts:
+ - mountPath: /etc/prometheus
+ name: prometheus-config
+ - mountPath: /prometheus
+ name: prometheus-data
+
+ # Deploy alertmanager behind prometheus-alert-buffer behind an oauth proxy
+ - name: alerts-proxy
+ image: "{{ openshift_prometheus_image_proxy }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_oauth_proxy_memory_requests is defined and openshift_prometheus_oauth_proxy_memory_requests is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_requests is defined and openshift_prometheus_oauth_proxy_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_oauth_proxy_memory_limit is defined and openshift_prometheus_oauth_proxy_memory_limit is not none %}
+ memory: "{{openshift_prometheus_oauth_proxy_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_oauth_proxy_cpu_limit is defined and openshift_prometheus_oauth_proxy_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_oauth_proxy_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 9443
+ name: web
+ args:
+ - -provider=openshift
+ - -https-address=:9443
+ - -http-address=
+ - -email-domain=*
+ - -upstream=http://localhost:9099
+ - -client-id=system:serviceaccount:{{ namespace }}:prometheus
+ - '-openshift-sar={"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}'
+ - '-openshift-delegate-urls={"/": {"resource": "namespaces", "verb": "get", "resourceName": "{{ namespace }}", "namespace": "{{ namespace }}"}}'
+ - -tls-cert=/etc/tls/private/tls.crt
+ - -tls-key=/etc/tls/private/tls.key
+ - -client-secret-file=/var/run/secrets/kubernetes.io/serviceaccount/token
+ - -cookie-secret-file=/etc/proxy/secrets/session_secret
+ volumeMounts:
+ - mountPath: /etc/tls/private
+ name: alerts-tls
+ - mountPath: /etc/proxy/secrets
+ name: alerts-secrets
+
+ - name: alert-buffer
+ args:
+ - --storage-path=/alert-buffer/messages.db
+ image: "{{ openshift_prometheus_image_alertbuffer }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_alertbuffer_memory_requests is defined and openshift_prometheus_alertbuffer_memory_requests is not none %}
+ memory: "{{openshift_prometheus_alertbuffer_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_alertbuffer_cpu_requests is defined and openshift_prometheus_alertbuffer_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_alertbuffer_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_alertbuffer_memory_limit is defined and openshift_prometheus_alertbuffer_memory_limit is not none %}
+ memory: "{{openshift_prometheus_alertbuffer_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_alertbuffer_cpu_limit is defined and openshift_prometheus_alertbuffer_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_alertbuffer_cpu_limit}}"
+{% endif %}
+ volumeMounts:
+ - mountPath: /alert-buffer
+ name: alert-buffer-data
+ ports:
+ - containerPort: 9099
+ name: alert-buf
+
+ - name: alertmanager
+ args:
+ - -config.file=/etc/alertmanager/alertmanager.yml
+ image: "{{ openshift_prometheus_image_alertmanager }}"
+ imagePullPolicy: IfNotPresent
+ resources:
+ requests:
+{% if openshift_prometheus_alertmanager_memory_requests is defined and openshift_prometheus_alertmanager_memory_requests is not none %}
+ memory: "{{openshift_prometheus_alertmanager_memory_requests}}"
+{% endif %}
+{% if openshift_prometheus_alertmanager_cpu_requests is defined and openshift_prometheus_alertmanager_cpu_requests is not none %}
+ cpu: "{{openshift_prometheus_alertmanager_cpu_requests}}"
+{% endif %}
+ limits:
+{% if openshift_prometheus_alertmanager_memory_limit is defined and openshift_prometheus_alertmanager_memory_limit is not none %}
+ memory: "{{openshift_prometheus_alertmanager_memory_limit}}"
+{% endif %}
+{% if openshift_prometheus_alertmanager_cpu_limit is defined and openshift_prometheus_alertmanager_cpu_limit is not none %}
+ cpu: "{{openshift_prometheus_alertmanager_cpu_limit}}"
+{% endif %}
+ ports:
+ - containerPort: 9093
+ name: web
+ volumeMounts:
+ - mountPath: /etc/alertmanager
+ name: alertmanager-config
+ - mountPath: /alertmanager
+ name: alertmanager-data
+
+ restartPolicy: Always
+ volumes:
+ - name: prometheus-config
+ configMap:
+ defaultMode: 420
+ name: prometheus
+ - name: prometheus-secrets
+ secret:
+ secretName: prometheus-proxy
+ - name: prometheus-tls
+ secret:
+ secretName: prometheus-tls
+ - name: prometheus-data
+{% if openshift_prometheus_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
+ - name: alertmanager-config
+ configMap:
+ defaultMode: 420
+ name: prometheus-alerts
+ - name: alerts-secrets
+ secret:
+ secretName: alerts-proxy
+ - name: alerts-tls
+ secret:
+ secretName: prometheus-alerts-tls
+ - name: alertmanager-data
+{% if openshift_prometheus_alertmanager_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_alertmanager_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}
+ - name: alert-buffer-data
+{% if openshift_prometheus_alertbuffer_storage_type == 'pvc' %}
+ persistentVolumeClaim:
+ claimName: {{ openshift_prometheus_alertbuffer_pvc_name }}
+{% else %}
+ emptydir: {}
+{% endif %}