11# (C) Datadog, Inc. 2018-present
22# All rights reserved
33# Licensed under a 3-clause BSD style license (see LICENSE)
4- import requests
54from six .moves .urllib .parse import urlparse
65
76from datadog_checks .base import ConfigurationError , OpenMetricsBaseCheck , is_affirmative
8- from datadog_checks .base .errors import CheckException
97
108from .metrics import METRIC_MAP
119
@@ -65,25 +63,13 @@ class Etcd(OpenMetricsBaseCheck):
6563
6664 def __init__ (self , name , init_config , instances ):
6765
68- instance = instances [0 ]
69- if is_affirmative (instance .get ('use_preview' , True )):
70- self .HTTP_CONFIG_REMAPPER = {
71- 'ssl_cert' : {'name' : 'tls_cert' },
72- 'ssl_private_key' : {'name' : 'tls_private_key' },
73- 'ssl_ca_cert' : {'name' : 'tls_ca_cert' },
74- 'ssl_verify' : {'name' : 'tls_verify' },
75- 'prometheus_timeout' : {'name' : 'timeout' },
76- }
77- else :
78- # For legacy check ensure prometheus_url is set so
79- # OpenMetricsBaseCheck instantiation succeeds
80- instance .setdefault ('prometheus_url' , '' )
81- self .HTTP_CONFIG_REMAPPER = {
82- 'ssl_keyfile' : {'name' : 'tls_private_key' },
83- 'ssl_certfile' : {'name' : 'tls_cert' },
84- 'ssl_cert_validation' : {'name' : 'tls_verify' },
85- 'ssl_ca_certs' : {'name' : 'tls_ca_cert' },
86- }
66+ self .HTTP_CONFIG_REMAPPER = {
67+ 'ssl_cert' : {'name' : 'tls_cert' },
68+ 'ssl_private_key' : {'name' : 'tls_private_key' },
69+ 'ssl_ca_cert' : {'name' : 'tls_ca_cert' },
70+ 'ssl_verify' : {'name' : 'tls_verify' },
71+ 'prometheus_timeout' : {'name' : 'timeout' },
72+ }
8773
8874 super (Etcd , self ).__init__ (
8975 name ,
@@ -103,11 +89,26 @@ def __init__(self, name, init_config, instances):
10389 )
10490
10591 def check (self , _ ):
106- if is_affirmative (self .instance .get ('use_preview' , True )):
107- self .check_post_v3 ()
108- else :
109- self .warning ('In the future etcd check will only support ETCD v3+.' )
110- self .check_pre_v3 ()
92+ scraper_config = self .get_scraper_config (self .instance )
93+
94+ if 'prometheus_url' not in scraper_config :
95+ raise ConfigurationError ('You have to define at least one `prometheus_url`.' )
96+
97+ if not scraper_config .get ('metrics_mapper' ):
98+ raise ConfigurationError (
99+ 'You have to collect at least one metric from the endpoint `{}`.' .format (
100+ scraper_config ['prometheus_url' ]
101+ )
102+ )
103+
104+ tags = []
105+
106+ if is_affirmative (self .instance .get ('leader_tag' , True )):
107+ self .add_leader_state_tag (scraper_config , tags )
108+
109+ scraper_config ['_metric_tags' ][:] = tags
110+
111+ self .process (scraper_config )
111112
112113 def access_api (self , scraper_config , path , data = '{}' ):
113114 url = urlparse (scraper_config ['prometheus_url' ])
@@ -136,197 +137,11 @@ def add_leader_state_tag(self, scraper_config, tags):
136137 if is_leader is not None :
137138 tags .append ('is_leader:{}' .format ('true' if is_leader else 'false' ))
138139
139- def check_post_v3 (self ):
140- scraper_config = self .get_scraper_config (self .instance )
141-
142- if 'prometheus_url' not in scraper_config :
143- raise ConfigurationError ('You have to define at least one `prometheus_url`.' )
144-
145- if not scraper_config .get ('metrics_mapper' ):
146- raise ConfigurationError (
147- 'You have to collect at least one metric from the endpoint `{}`.' .format (
148- scraper_config ['prometheus_url' ]
149- )
150- )
151-
152- tags = []
153-
154- if is_affirmative (self .instance .get ('leader_tag' , True )):
155- self .add_leader_state_tag (scraper_config , tags )
156-
157- scraper_config ['_metric_tags' ][:] = tags
158-
159- self .process (scraper_config )
160-
161140 def transform_metadata (self , metric , scraper_config ):
162141 super (Etcd , self ).transform_metadata (metric , scraper_config )
163142
164143 # Needed for backward compatibility, we continue to submit `etcd.server.version` metric
165144 self .submit_openmetric ('server.version' , metric , scraper_config )
166145
167- def check_pre_v3 (self ):
168- if 'url' not in self .instance :
169- raise ConfigurationError ('etcd instance missing "url" value.' )
170-
171- # Load values from the instance config
172- url = self .instance ['url' ]
173- instance_tags = self .instance .get ('tags' , [])
174-
175- # Get a copy of tags for the CRIT statuses
176- critical_tags = list (instance_tags )
177-
178- # Append the instance's URL in case there are more than one, that
179- # way they can tell the difference!
180- instance_tags .append ('url:{}' .format (url ))
181- is_leader = False
182-
183- # Gather self health status
184- sc_state = self .UNKNOWN
185- health_status = self ._get_health_status (url )
186- if health_status is not None :
187- sc_state = self .OK if self ._is_healthy (health_status ) else self .CRITICAL
188- self .service_check (self .HEALTH_SERVICE_CHECK_NAME , sc_state , tags = instance_tags )
189-
190- # Gather self metrics
191- self_response = self ._get_self_metrics (url , critical_tags )
192- if self_response is not None :
193- if self_response ['state' ] == 'StateLeader' :
194- is_leader = True
195- instance_tags .append ('etcd_state:leader' )
196- gauges = self .LEADER_GAUGES
197- else :
198- instance_tags .append ('etcd_state:follower' )
199- gauges = self .FOLLOWER_GAUGES
200-
201- for key in self .SELF_RATES :
202- if key in self_response :
203- self .rate (self .SELF_RATES [key ], self_response [key ], tags = instance_tags )
204- else :
205- self .log .warning ('Missing key %s in stats.' , key )
206-
207- for key in gauges :
208- if key in self_response :
209- self .gauge (gauges [key ], self_response [key ], tags = instance_tags )
210- else :
211- self .log .warning ('Missing key %s in stats.' , key )
212-
213- # Gather store metrics
214- store_response = self ._get_store_metrics (url , critical_tags )
215- if store_response is not None :
216- for key in self .STORE_RATES :
217- if key in store_response :
218- self .rate (self .STORE_RATES [key ], store_response [key ], tags = instance_tags )
219- else :
220- self .log .warning ('Missing key %s in stats.' , key )
221-
222- for key in self .STORE_GAUGES :
223- if key in store_response :
224- self .gauge (self .STORE_GAUGES [key ], store_response [key ], tags = instance_tags )
225- else :
226- self .log .warning ('Missing key %s in stats.' , key )
227-
228- # Gather leader metrics
229- if is_leader :
230- leader_response = self ._get_leader_metrics (url , critical_tags )
231- if leader_response is not None and len (leader_response .get ("followers" , {})) > 0 :
232- # Get the followers
233- followers = leader_response .get ("followers" )
234- for fol in followers :
235- # counts
236- for key in self .LEADER_COUNTS :
237- self .rate (
238- self .LEADER_COUNTS [key ],
239- followers [fol ].get ("counts" ).get (key ),
240- tags = instance_tags + ['follower:{}' .format (fol )],
241- )
242- # latency
243- for key in self .LEADER_LATENCY :
244- self .gauge (
245- self .LEADER_LATENCY [key ],
246- followers [fol ].get ("latency" ).get (key ),
247- tags = instance_tags + ['follower:{}' .format (fol )],
248- )
249-
250- # Service check
251- if self_response is not None and store_response is not None :
252- self .service_check (self .SERVICE_CHECK_NAME , self .OK , tags = instance_tags )
253-
254- self ._collect_metadata (url , critical_tags )
255-
256- def _get_health_status (self , url ):
257- """
258- Don't send the "can connect" service check if we have troubles getting
259- the health status
260- """
261- try :
262- r = self ._perform_request (url , "/health" )
263- # we don't use get() here so we can report a KeyError
264- return r .json ()[self .HEALTH_KEY ]
265- except Exception as e :
266- self .log .debug ("Can't determine health status: %s" , e )
267-
268- def _get_self_metrics (self , url , tags ):
269- return self ._get_json (url , "/v2/stats/self" , tags )
270-
271- def _get_store_metrics (self , url , tags ):
272- return self ._get_json (url , "/v2/stats/store" , tags )
273-
274- def _get_leader_metrics (self , url , tags ):
275- return self ._get_json (url , "/v2/stats/leader" , tags )
276-
277146 def _perform_request (self , url , path ):
278147 return self .http .get (url + path )
279-
280- def _get_json (self , url , path , tags ):
281- try :
282- r = self ._perform_request (url , path )
283- except requests .exceptions .Timeout :
284- self .service_check (
285- self .SERVICE_CHECK_NAME ,
286- self .CRITICAL ,
287- message = 'Timeout when hitting {}' .format (url ),
288- tags = tags + ['url:{}' .format (url )],
289- )
290- raise
291- except Exception as e :
292- self .service_check (
293- self .SERVICE_CHECK_NAME ,
294- self .CRITICAL ,
295- message = 'Error hitting {}. Error: {}' .format (url , str (e )),
296- tags = tags + ['url:{}' .format (url )],
297- )
298- raise
299-
300- if r .status_code != 200 :
301- self .service_check (
302- self .SERVICE_CHECK_NAME ,
303- self .CRITICAL ,
304- message = 'Got {} when hitting {}' .format (r .status_code , url ),
305- tags = tags + ['url:{}' .format (url )],
306- )
307- raise CheckException ('Http status code {} on url {}' .format (r .status_code , url ))
308-
309- return r .json ()
310-
311- @classmethod
312- def _is_healthy (cls , status ):
313- """
314- Version of etcd prior to 3.3 return this payload when you hit /health:
315- {"health": "true"}
316-
317- which is wrong since the value is a `bool` on etcd.
318-
319- Version 3.3 fixed this issue in https://github.com/coreos/etcd/pull/8312
320- but we need to support both.
321- """
322- if isinstance (status , bool ):
323- return status
324-
325- return status == "true"
326-
327- def _collect_metadata (self , url , tags ):
328- resp = self ._get_json (url , "/version" , tags )
329- server_version = resp .get ('etcdserver' )
330- self .log .debug ("Agent version is `%s`" , server_version )
331- if server_version :
332- self .set_metadata ('version' , server_version )
0 commit comments