corosync  3.1.5.15-9134
wd.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2012 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Author: Angus Salkeld <asalkeld@redhat.com>
7  *
8  * This software licensed under BSD license, the text of which follows:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * - Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  * - Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * - Neither the name of the MontaVista Software, Inc. nor the names of its
19  * contributors may be used to endorse or promote products derived from this
20  * software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <config.h>
36 
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <sys/ioctl.h>
40 #include <linux/types.h>
41 #include <linux/watchdog.h>
42 #include <sys/reboot.h>
43 
44 #include <corosync/corotypes.h>
45 #include <corosync/corodefs.h>
46 #include <corosync/coroapi.h>
47 #include <qb/qblist.h>
48 #include <corosync/logsys.h>
49 #include <corosync/icmap.h>
50 #include "fsm.h"
51 
52 #include "service.h"
53 
54 typedef enum {
60 
61 struct resource {
63  char *recovery;
65  time_t last_updated;
66  struct cs_fsm fsm;
67 
69  uint64_t check_timeout;
71 };
72 
74 
75 /*
76  * Service Interfaces required by service_message_handler struct
77  */
78 static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api);
79 static int wd_exec_exit_fn (void);
80 static void wd_resource_check_fn (void* resource_ref);
81 
82 static struct corosync_api_v1 *api;
83 #define WD_DEFAULT_TIMEOUT_SEC 6
84 #define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
85 #define WD_MIN_TIMEOUT_MS 500
86 #define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
87 static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
88 static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
89 static int dog = -1;
90 static corosync_timer_handle_t wd_timer;
91 static int watchdog_ok = 1;
92 static char *watchdog_device = NULL;
93 
95  .name = "corosync watchdog service",
96  .id = WD_SERVICE,
97  .priority = 1,
98  .private_data_size = 0,
99  .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
100  .lib_init_fn = NULL,
101  .lib_exit_fn = NULL,
102  .lib_engine = NULL,
103  .lib_engine_count = 0,
104  .exec_engine = NULL,
105  .exec_engine_count = 0,
106  .confchg_fn = NULL,
107  .exec_init_fn = wd_exec_init_fn,
108  .exec_exit_fn = wd_exec_exit_fn,
109  .exec_dump_fn = NULL
110 };
111 
112 static QB_LIST_DECLARE (confchg_notify);
113 
114 /*
115  * F S M
116  */
117 static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
118 static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
119 
124 };
125 
129 };
130 
131 const char * wd_running_str = "running";
132 const char * wd_failed_str = "failed";
133 const char * wd_failure_str = "failure";
134 const char * wd_stopped_str = "stopped";
135 const char * wd_config_changed_str = "config_changed";
136 
137 struct cs_fsm_entry wd_fsm_table[] = {
138  { WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} },
139  { WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} },
140  { WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
141  { WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} },
142  { WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
143  { WD_S_FAILED, WD_E_FAILURE, NULL, {-1} },
144 };
145 
147 {
148  return (&wd_service_engine);
149 }
150 
151 static const char * wd_res_state_to_str(struct cs_fsm* fsm,
152  int32_t state)
153 {
154  switch (state) {
155  case WD_S_STOPPED:
156  return wd_stopped_str;
157  break;
158  case WD_S_RUNNING:
159  return wd_running_str;
160  break;
161  case WD_S_FAILED:
162  return wd_failed_str;
163  break;
164  }
165  return NULL;
166 }
167 
168 static const char * wd_res_event_to_str(struct cs_fsm* fsm,
169  int32_t event)
170 {
171  switch (event) {
172  case WD_E_CONFIG_CHANGED:
173  return wd_config_changed_str;
174  break;
175  case WD_E_FAILURE:
176  return wd_failure_str;
177  break;
178  }
179  return NULL;
180 }
181 
182 static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
183  int32_t next_state, int32_t fsm_event, void *data)
184 {
185  switch (cb_event) {
187  log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
188  fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
190  break;
192  log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
193  fsm->name,
194  fsm->event_to_str(fsm, fsm_event),
195  fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
196  fsm->state_to_str(fsm, next_state));
197  break;
199  log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
200  fsm->name,
201  fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
202  fsm->state_to_str(fsm, next_state),
203  fsm->event_to_str(fsm, fsm_event));
205  break;
206  default:
207  log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!");
209  break;
210  }
211 }
212 
213 /*
214  * returns (CS_TRUE == OK, CS_FALSE == failed)
215  */
216 static int32_t wd_resource_state_is_ok (struct resource *ref)
217 {
218  char* state = NULL;
219  uint64_t last_updated;
220  uint64_t my_time;
221  uint64_t allowed_period;
222  char key_name[ICMAP_KEYNAME_MAXLEN];
223 
224  if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated") >= ICMAP_KEYNAME_MAXLEN) ||
225  (icmap_get_uint64(key_name, &last_updated) != CS_OK)) {
226  /* key does not exist.
227  */
228  return CS_FALSE;
229  }
230 
231  if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state") >= ICMAP_KEYNAME_MAXLEN) ||
232  (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0)) {
233  /* key does not exist.
234  */
235  if (state != NULL)
236  free(state);
237 
238  return CS_FALSE;
239  }
240 
241  if (last_updated == 0) {
242  /* initial value */
243  free(state);
244  return CS_TRUE;
245  }
246 
247  my_time = cs_timestamp_get();
248 
249  /*
250  * Here we check that the monitor has written a timestamp within the poll_period
251  * plus a grace factor of (0.5 * poll_period).
252  */
253  allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
254  if ((last_updated + allowed_period) < my_time) {
256  "last_updated %"PRIu64" ms too late, period:%"PRIu64".",
257  (uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)),
258  ref->check_timeout);
259  free(state);
260  return CS_FALSE;
261  }
262 
263  if (strcmp (state, wd_failed_str) == 0) {
264  free(state);
265  return CS_FALSE;
266  }
267 
268  free(state);
269  return CS_TRUE;
270 }
271 
272 static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
273 {
274  char *state;
275  uint64_t tmp_value;
276  uint64_t next_timeout;
277  struct resource *ref = (struct resource*)data;
278  char key_name[ICMAP_KEYNAME_MAXLEN];
279 
280  next_timeout = ref->check_timeout;
281 
282  if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period") >= ICMAP_KEYNAME_MAXLEN) ||
283  (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK)) {
284  if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
286  "poll_period changing from:%"PRIu64" to %"PRIu64".",
287  ref->check_timeout, tmp_value);
288  /*
289  * To easy in the transition between poll_period's we are going
290  * to make the first timeout the bigger of the new and old value.
291  * This is to give the monitoring system time to adjust.
292  */
293  next_timeout = CS_MAX(tmp_value, ref->check_timeout);
294  ref->check_timeout = tmp_value;
295  } else {
297  "Could NOT use poll_period:%"PRIu64" ms for resource %s",
298  tmp_value, ref->name);
299  }
300  }
301 
302  if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery") >= ICMAP_KEYNAME_MAXLEN) ||
303  (icmap_get_string(key_name, &ref->recovery) != CS_OK)) {
304  /* key does not exist.
305  */
307  "resource %s missing a recovery key.", ref->name);
308  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
309  return;
310  }
311  if ((snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state") >= ICMAP_KEYNAME_MAXLEN) ||
312  (icmap_get_string(key_name, &state) != CS_OK)) {
313  /* key does not exist.
314  */
316  "resource %s missing a state key.", ref->name);
317  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
318  return;
319  }
320  if (ref->check_timer) {
321  api->timer_delete(ref->check_timer);
322  ref->check_timer = 0;
323  }
324 
325  if (strcmp(wd_stopped_str, state) == 0) {
326  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
327  } else {
328  api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
329  ref, wd_resource_check_fn, &ref->check_timer);
330  cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
331  }
332  free(state);
333 }
334 
335 static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
336 {
337  struct resource* ref = (struct resource*)data;
338 
339  if (ref->check_timer) {
340  api->timer_delete(ref->check_timer);
341  ref->check_timer = 0;
342  }
343 
344  log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
345  ref->recovery, (char*)ref->name);
346  if (strcmp (ref->recovery, "watchdog") == 0 ||
347  strcmp (ref->recovery, "quit") == 0) {
348  watchdog_ok = 0;
349  }
350  else if (strcmp (ref->recovery, "reboot") == 0) {
351  reboot(RB_AUTOBOOT);
352  }
353  else if (strcmp (ref->recovery, "shutdown") == 0) {
354  reboot(RB_POWER_OFF);
355  }
356  cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb);
357 }
358 
359 static void wd_key_changed(
360  int32_t event,
361  const char *key_name,
362  struct icmap_notify_value new_val,
363  struct icmap_notify_value old_val,
364  void *user_data)
365 {
366  struct resource* ref = (struct resource*)user_data;
367  char *last_key_part;
368 
369  if (ref == NULL) {
370  return ;
371  }
372 
373  last_key_part = strrchr(key_name, '.');
374  if (last_key_part == NULL) {
375  return ;
376  }
377  last_key_part++;
378 
379  if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) {
380  if (strcmp(last_key_part, "last_updated") == 0 ||
381  strcmp(last_key_part, "current") == 0) {
382  return;
383  }
384 
385  cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb);
386  }
387 
388  if (event == ICMAP_TRACK_DELETE && ref != NULL) {
389  if (strcmp(last_key_part, "state") != 0) {
390  return ;
391  }
392 
394  "resource \"%s\" deleted from cmap!",
395  ref->name);
396 
397  api->timer_delete(ref->check_timer);
398  ref->check_timer = 0;
400 
401  free(ref);
402  }
403 }
404 
405 static void wd_resource_check_fn (void* resource_ref)
406 {
407  struct resource* ref = (struct resource*)resource_ref;
408 
409  if (wd_resource_state_is_ok (ref) == CS_FALSE) {
410  cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb);
411  return;
412  }
414  ref, wd_resource_check_fn, &ref->check_timer);
415 }
416 
417 /*
418  * return 0 - fully configured
419  * return -1 - partially configured
420  */
421 static int32_t wd_resource_create (char *res_path, char *res_name)
422 {
423  char *state;
424  uint64_t tmp_value;
425  struct resource *ref = calloc (1, sizeof (struct resource));
426  char key_name[ICMAP_KEYNAME_MAXLEN];
427 
428  strcpy(ref->res_path, res_path);
430  ref->check_timer = 0;
431 
432  strcpy(ref->name, res_name);
433  ref->fsm.name = ref->name;
434  ref->fsm.table = wd_fsm_table;
435  ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
436  ref->fsm.curr_entry = 0;
437  ref->fsm.curr_state = WD_S_STOPPED;
438  ref->fsm.state_to_str = wd_res_state_to_str;
439  ref->fsm.event_to_str = wd_res_event_to_str;
440 
441  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period");
442  if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
443  icmap_set_uint64(key_name, ref->check_timeout);
444  } else {
445  if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
446  ref->check_timeout = tmp_value;
447  } else {
449  "Could NOT use poll_period:%"PRIu64" ms for resource %s",
450  tmp_value, ref->name);
451  }
452  }
453 
456  wd_key_changed,
457  ref, &ref->icmap_track);
458 
459  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery");
460  if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
461  /* key does not exist.
462  */
464  "resource %s missing a recovery key.", ref->name);
465  return -1;
466  }
467  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state");
468  if (icmap_get_string(key_name, &state) != CS_OK) {
469  /* key does not exist.
470  */
472  "resource %s missing a state key.", ref->name);
473  return -1;
474  }
475 
476  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated");
477  if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
478  /* key does not exist.
479  */
480  ref->last_updated = 0;
481  } else {
482  ref->last_updated = tmp_value;
483  }
484 
485  /*
486  * delay the first check to give the monitor time to start working.
487  */
488  tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
489  api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
490  ref,
491  wd_resource_check_fn, &ref->check_timer);
492 
493  cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
494  return 0;
495 }
496 
497 
498 static void wd_tickle_fn (void* arg)
499 {
500  ENTER();
501 
502  if (watchdog_ok) {
503  if (dog > 0) {
504  ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
505  }
506  api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
507  wd_tickle_fn, &wd_timer);
508  }
509  else {
510  log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
511  }
512 
513 }
514 
515 static void wd_resource_created_cb(
516  int32_t event,
517  const char *key_name,
518  struct icmap_notify_value new_val,
519  struct icmap_notify_value old_val,
520  void *user_data)
521 {
522  char res_name[ICMAP_KEYNAME_MAXLEN];
523  char res_type[ICMAP_KEYNAME_MAXLEN];
524  char tmp_key[ICMAP_KEYNAME_MAXLEN];
525  int res;
526 
527  if (event != ICMAP_TRACK_ADD) {
528  return ;
529  }
530 
531  res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
532  if (res != 3) {
533  return ;
534  }
535 
536  if (strcmp(tmp_key, "state") != 0) {
537  return ;
538  }
539 
540  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
541  wd_resource_create (tmp_key, res_name);
542 }
543 
544 static void wd_scan_resources (void)
545 {
546  int res_count = 0;
547  icmap_track_t icmap_track = NULL;
548  icmap_iter_t iter;
549  const char *key_name;
550  int res;
551  char res_name[ICMAP_KEYNAME_MAXLEN];
552  char res_type[ICMAP_KEYNAME_MAXLEN];
553  char tmp_key[ICMAP_KEYNAME_MAXLEN];
554 
555  ENTER();
556 
557  iter = icmap_iter_init("resources.");
558  while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) {
559  res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
560  if (res != 3) {
561  continue ;
562  }
563 
564  if (strcmp(tmp_key, "state") != 0) {
565  continue ;
566  }
567 
568  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
569  if (wd_resource_create (tmp_key, res_name) == 0) {
570  res_count++;
571  }
572  }
573  icmap_iter_finalize(iter);
574 
575  icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
576  wd_resource_created_cb, NULL, &icmap_track);
577  icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
578  wd_resource_created_cb, NULL, &icmap_track);
579 
580  if (res_count == 0) {
581  log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
582  }
583 }
584 
585 
586 static void watchdog_timeout_apply (uint32_t new)
587 {
588  struct watchdog_info ident;
589  uint32_t original_timeout = 0;
590 
591  if (dog > 0) {
592  ioctl(dog, WDIOC_GETTIMEOUT, &original_timeout);
593  }
594 
595  if (new == original_timeout) {
596  return;
597  }
598 
599  watchdog_timeout = new;
600 
601  if (dog > 0) {
602  ioctl(dog, WDIOC_GETSUPPORT, &ident);
603  if (ident.options & WDIOF_SETTIMEOUT) {
604  /* yay! the dog is trained.
605  */
606  ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
607  }
608  ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
609  }
610 
611  if (watchdog_timeout == new) {
612  tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
613 
614  /* reset the tickle timer in case it was reduced.
615  */
616  api->timer_delete (wd_timer);
617  api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
618  wd_tickle_fn, &wd_timer);
619 
620  log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout);
621  log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout);
622  } else {
624  "Could not change the Watchdog timeout from %d to %d seconds",
625  original_timeout, new);
626  }
627 
628 }
629 
630 static int setup_watchdog(void)
631 {
632  struct watchdog_info ident;
633  char *str;
634 
635  ENTER();
636 
637  if (icmap_get_string("resources.watchdog_device", &str) == CS_OK) {
638  if (str[0] == 0 || strcmp (str, "off") == 0) {
639  log_printf (LOGSYS_LEVEL_WARNING, "Watchdog disabled by configuration");
640  free(str);
641  dog = -1;
642  return -1;
643  } else {
644  watchdog_device = str;
645  }
646  } else {
647  log_printf (LOGSYS_LEVEL_WARNING, "Watchdog not enabled by configuration");
648  dog = -1;
649  return -1;
650  }
651 
652  if (access (watchdog_device, W_OK) != 0) {
653  log_printf (LOGSYS_LEVEL_WARNING, "No watchdog %s, try modprobe <a watchdog>", watchdog_device);
654  dog = -1;
655  return -1;
656  }
657 
658  /* here goes, lets hope they have "Magic Close"
659  */
660  dog = open(watchdog_device, O_WRONLY);
661 
662  if (dog == -1) {
663  log_printf (LOGSYS_LEVEL_WARNING, "Watchdog %s exists but couldn't be opened.", watchdog_device);
664  dog = -1;
665  return -1;
666  }
667 
668  /* Right we have the dog.
669  * Lets see what breed it is.
670  */
671 
672  ioctl(dog, WDIOC_GETSUPPORT, &ident);
673  log_printf (LOGSYS_LEVEL_INFO, "Watchdog %s is now being tickled by corosync.", watchdog_device);
674  log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
675 
676  watchdog_timeout_apply (watchdog_timeout);
677 
678  ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
679 
680  return 0;
681 }
682 
683 static void wd_top_level_key_changed(
684  int32_t event,
685  const char *key_name,
686  struct icmap_notify_value new_val,
687  struct icmap_notify_value old_val,
688  void *user_data)
689 {
690  uint32_t tmp_value_32;
691 
692  ENTER();
693 
694  if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) == CS_OK) {
695  if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
696  watchdog_timeout_apply (tmp_value_32);
697  return;
698  }
699  }
700 
702  "Set watchdog_timeout is out of range (2..120).");
703  icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
704 }
705 
706 static void watchdog_timeout_get_initial (void)
707 {
708  uint32_t tmp_value_32;
709  icmap_track_t icmap_track = NULL;
710 
711  ENTER();
712 
713  if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
714  watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
715 
716  icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
717  }
718  else {
719  if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
720  watchdog_timeout_apply (tmp_value_32);
721  }
722  else {
724  "Set watchdog_timeout is out of range (2..120).");
726  "use default value %d seconds.", WD_DEFAULT_TIMEOUT_SEC);
727  watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
728  icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
729  }
730  }
731 
732  icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY,
733  wd_top_level_key_changed, NULL, &icmap_track);
734 
735 }
736 
737 static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api)
738 {
739 
740  ENTER();
741 
742  api = corosync_api;
743 
744  watchdog_timeout_get_initial();
745 
746  setup_watchdog();
747 
748  wd_scan_resources();
749 
750  return NULL;
751 }
752 
753 static int wd_exec_exit_fn (void)
754 {
755  char magic = 'V';
756  ENTER();
757 
758  if (dog > 0) {
759  log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog.");
760  if (write (dog, &magic, 1) == -1) {
761  log_printf (LOGSYS_LEVEL_ERROR, "failed to write %c to dog(%d).", magic, dog);
762  }
763  }
764  return 0;
765 }
766 
767 
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
@ CS_LIB_FLOW_CONTROL_NOT_REQUIRED
Definition: coroapi.h:153
#define MILLI_2_NANO_SECONDS
Definition: coroapi.h:105
@ WD_SERVICE
Definition: corodefs.h:51
#define CS_TIME_MS_IN_SEC
Definition: corotypes.h:133
#define CS_MAX(x, y)
Definition: corotypes.h:57
#define CS_FALSE
Definition: corotypes.h:53
#define CS_TRUE
Definition: corotypes.h:54
#define CS_MAX_NAME_LENGTH
Definition: corotypes.h:55
@ CS_OK
Definition: corotypes.h:99
QB_LIST_DECLARE(cpg_pd_list_head)
#define corosync_exit_error(err)
Definition: exec/util.h:72
@ COROSYNC_DONE_FATAL_ERR
Definition: exec/util.h:55
#define CS_FSM_CB_EVENT_PROCESS_NF
Definition: fsm.h:54
#define CS_FSM_CB_EVENT_STATE_SET
Definition: fsm.h:55
#define CS_FSM_CB_EVENT_STATE_SET_NF
Definition: fsm.h:56
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:892
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1159
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem....
Definition: icmap.h:85
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
Definition: icmap.c:1089
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
Definition: icmap.c:1095
qb_map_iter_t * icmap_iter_t
Itterator type.
Definition: icmap.h:123
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
Definition: icmap.c:1116
cs_error_t icmap_track_delete(icmap_track_t icmap_track)
Remove previously added track.
Definition: icmap.c:1204
cs_error_t icmap_set_uint64(const char *key_name, uint64_t value)
Definition: icmap.c:609
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:597
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
cs_error_t icmap_get_uint64(const char *key_name, uint64_t *u64)
Definition: icmap.c:904
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:856
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:72
#define log_printf(level, format, args...)
Definition: logsys.h:332
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:75
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:71
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:73
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:76
#define ENTER
Definition: logsys.h:333
#define LOGSYS_LEVEL_ALERT
Definition: logsys.h:70
void * user_data
Definition: sam.c:127
The corosync_api_v1 struct.
Definition: coroapi.h:225
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
The corosync_service_engine struct.
Definition: coroapi.h:490
const char * name
Definition: coroapi.h:491
Definition: fsm.h:58
int32_t curr_state
Definition: fsm.h:59
Definition: fsm.h:65
int32_t curr_entry
Definition: fsm.h:68
int32_t curr_state
Definition: fsm.h:67
cs_fsm_state_to_str_fn state_to_str
Definition: fsm.h:71
size_t entries
Definition: fsm.h:69
const char * name
Definition: fsm.h:66
struct cs_fsm_entry * table
Definition: fsm.h:70
cs_fsm_event_to_str_fn event_to_str
Definition: fsm.h:72
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
Definition: wd.c:61
icmap_track_t icmap_track
Definition: wd.c:70
corosync_timer_handle_t check_timer
Definition: wd.c:68
char name[CS_MAX_NAME_LENGTH]
Definition: wd.c:64
char res_path[ICMAP_KEYNAME_MAXLEN]
Definition: wd.c:62
char * recovery
Definition: wd.c:63
time_t last_updated
Definition: wd.c:65
struct cs_fsm fsm
Definition: wd.c:66
uint64_t check_timeout
Definition: wd.c:69
unsigned short magic
Definition: totem.h:0
#define WD_MIN_TIMEOUT_MS
Definition: wd.c:85
LOGSYS_DECLARE_SUBSYS("WD")
const char * wd_running_str
Definition: wd.c:131
#define WD_DEFAULT_TIMEOUT_SEC
Definition: wd.c:83
struct corosync_service_engine wd_service_engine
Definition: wd.c:94
const char * wd_failure_str
Definition: wd.c:133
struct corosync_service_engine * wd_get_service_engine_ver0(void)
Definition: wd.c:146
wd_resource_state
Definition: wd.c:120
@ WD_S_STOPPED
Definition: wd.c:123
@ WD_S_FAILED
Definition: wd.c:122
@ WD_S_RUNNING
Definition: wd.c:121
struct cs_fsm_entry wd_fsm_table[]
Definition: wd.c:137
const char * wd_config_changed_str
Definition: wd.c:135
wd_resource_state_t
Definition: wd.c:54
@ WD_RESOURCE_GOOD
Definition: wd.c:55
@ WD_RESOURCE_STATE_UNKNOWN
Definition: wd.c:57
@ WD_RESOURCE_FAILED
Definition: wd.c:56
@ WD_RESOURCE_NOT_MONITORED
Definition: wd.c:58
const char * wd_stopped_str
Definition: wd.c:134
#define WD_MAX_TIMEOUT_MS
Definition: wd.c:86
wd_resource_event
Definition: wd.c:126
@ WD_E_FAILURE
Definition: wd.c:127
@ WD_E_CONFIG_CHANGED
Definition: wd.c:128
#define WD_DEFAULT_TIMEOUT_MS
Definition: wd.c:84
const char * wd_failed_str
Definition: wd.c:132