rule_files: - '*.rules' # A scrape configuration for running Prometheus on a Kubernetes cluster. # This uses separate scrape configs for cluster components (i.e. API server, node) # and services to allow each to use different authentication configs. # # Kubernetes labels will be added as Prometheus labels on metrics via the # `labelmap` relabeling action. # Scrape config for API servers. # # Kubernetes exposes API servers as endpoints to the default/kubernetes # service so this uses `endpoints` role and uses relabelling to only keep # the endpoints associated with the default/kubernetes service using the # default named port `https`. This works for single API server deployments as # well as HA API server deployments. scrape_configs: - job_name: 'kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token # Keep only the default/kubernetes service endpoints for the https port. This # will add targets for each API server which Kubernetes adds an endpoint to # the default/kubernetes service. relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https # Scrape config for controllers. # # Each master node exposes a /metrics endpoint on :8444 that contains operational metrics for # the controllers. # - job_name: 'kubernetes-controllers' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: endpoints # Keep only the default/kubernetes service endpoints for the https port, and then # set the port to 8444. This is the default configuration for the controllers on OpenShift # masters. relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https - source_labels: [__address__] action: replace target_label: __address__ regex: (.+)(?::\d+) replacement: $1:8444 # Scrape config for nodes. # # Each node exposes a /metrics endpoint that contains operational metrics for # the Kubelet and other components. - job_name: 'kubernetes-nodes' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node # Drop a very high cardinality metric that is incorrect in 3.7. It will be # fixed in 3.9. metric_relabel_configs: - source_labels: [__name__] action: drop regex: 'openshift_sdn_pod_(setup|teardown)_latency(.*)' relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) # Scrape config for cAdvisor. # # Beginning in Kube 1.7, each node exposes a /metrics/cadvisor endpoint that # reports container metrics for each running pod. Scrape those by default. - job_name: 'kubernetes-cadvisor' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token {% if kubernetes_version | float() >= 1.7 | float() %} metrics_path: /metrics/cadvisor {% else %} metrics_path: /metrics {% endif %} kubernetes_sd_configs: - role: node # Exclude a set of high cardinality metrics that can contribute to significant # memory use in large clusters. These can be selectively enabled as necessary # for medium or small clusters. metric_relabel_configs: - source_labels: [__name__] action: drop regex: 'container_(cpu_user_seconds_total|cpu_cfs_periods_total|memory_usage_bytes|memory_swap|memory_working_set_bytes|memory_cache|last_seen|fs_(read_seconds_total|write_seconds_total|sector_(.*)|io_(.*)|reads_merged_total|writes_merged_total)|tasks_state|memory_failcnt|memory_failures_total|spec_memory_swap_limit_bytes|fs_(.*)_bytes_total|spec_(.*))' relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) # Scrape config for service endpoints. # # The relabeling allows the actual service scrape endpoint to be configured # via the following annotations: # # * `prometheus.io/scrape`: Only scrape services that have a value of `true` # * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need # to set this to `https` & most likely set the `tls_config` of the scrape config. # * `prometheus.io/path`: If the metrics path is not `/metrics` override this. # * `prometheus.io/port`: If the metrics are exposed on a different port to the # service then set this appropriately. - job_name: 'kubernetes-service-endpoints' tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt # TODO: this should be per target insecure_skip_verify: true kubernetes_sd_configs: - role: endpoints relabel_configs: # only scrape infrastructure components - source_labels: [__meta_kubernetes_namespace] action: keep regex: 'default|logging|metrics|kube-.+|openshift|openshift-.+' # drop infrastructure components managed by other scrape targets - source_labels: [__meta_kubernetes_service_name] action: drop regex: 'prometheus-node-exporter' # only those that have requested scraping - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme] action: replace target_label: __scheme__ regex: (https?) - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port] action: replace target_label: __address__ regex: (.+)(?::\d+);(\d+) replacement: $1:$2 - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_service_name] action: replace target_label: kubernetes_name # Scrape config for node-exporter, which is expected to be running on port 9100. - job_name: 'kubernetes-nodes-exporter' tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt kubernetes_sd_configs: - role: node metric_relabel_configs: - source_labels: [__name__] action: drop regex: 'node_cpu|node_(disk|scrape_collector)_.+' # preserve a subset of the network, netstat, vmstat, and filesystem series - source_labels: [__name__] action: replace regex: '(node_(netstat_Ip_.+|vmstat_(nr|thp)_.+|filesystem_(free|size|device_error)|network_(transmit|receive)_(drop|errs)))' target_label: __name__ replacement: renamed_$1 - source_labels: [__name__] action: drop regex: 'node_(netstat|vmstat|filesystem|network)_.+' - source_labels: [__name__] action: replace regex: 'renamed_(.+)' target_label: __name__ replacement: $1 # drop any partial expensive series - source_labels: [__name__, device] action: drop regex: 'node_network_.+;veth.+' - source_labels: [__name__, mountpoint] action: drop regex: 'node_filesystem_(free|size|device_error);([^/].*|/.+)' relabel_configs: - source_labels: [__address__] regex: '(.*):10250' replacement: '${1}:9100' target_label: __address__ - source_labels: [__meta_kubernetes_node_label_kubernetes_io_hostname] target_label: __instance__ - action: labelmap regex: __meta_kubernetes_node_label_(.+) # Scrape config for the template service broker - job_name: 'openshift-template-service-broker' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt server_name: apiserver.openshift-template-service-broker.svc bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: endpoints relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: openshift-template-service-broker;apiserver;https alerting: alertmanagers: - scheme: http static_configs: - targets: - "localhost:9093"