corosync  2.3.6
exec/votequorum.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009-2015 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Authors: Christine Caulfield (ccaulfie@redhat.com)
7  * Fabio M. Di Nitto (fdinitto@redhat.com)
8  *
9  * This software licensed under BSD license, the text of which follows:
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * - Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  * - Redistributions in binary form must reproduce the above copyright notice,
17  * this list of conditions and the following disclaimer in the documentation
18  * and/or other materials provided with the distribution.
19  * - Neither the name of the MontaVista Software, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived from this
21  * software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
33  * THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include <config.h>
37 
38 #include <sys/types.h>
39 #include <sys/stat.h>
40 #include <fcntl.h>
41 #include <stdint.h>
42 #include <unistd.h>
43 
44 #include <qb/qbipc_common.h>
45 
46 #include "quorum.h"
47 #include <corosync/corodefs.h>
48 #include <corosync/list.h>
49 #include <corosync/logsys.h>
50 #include <corosync/coroapi.h>
51 #include <corosync/icmap.h>
52 #include <corosync/votequorum.h>
54 
55 #include "service.h"
56 #include "util.h"
57 
58 LOGSYS_DECLARE_SUBSYS ("VOTEQ");
59 
60 /*
61  * interface with corosync
62  */
63 
64 static struct corosync_api_v1 *corosync_api;
65 
66 /*
67  * votequorum global config vars
68  */
69 
70 
71 static char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN];
72 static struct cluster_node *qdevice = NULL;
73 static unsigned int qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
74 static unsigned int qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
75 static uint8_t qdevice_can_operate = 1;
76 static void *qdevice_reg_conn = NULL;
77 static uint8_t qdevice_master_wins = 0;
78 
79 static uint8_t two_node = 0;
80 
81 static uint8_t wait_for_all = 0;
82 static uint8_t wait_for_all_status = 0;
83 
84 static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE;
85 static int lowest_node_id = -1;
86 static int highest_node_id = -1;
87 
88 #define DEFAULT_LMS_WIN 10000
89 static uint8_t last_man_standing = 0;
90 static uint32_t last_man_standing_window = DEFAULT_LMS_WIN;
91 
92 static uint8_t allow_downscale = 0;
93 static uint32_t ev_barrier = 0;
94 
95 static uint8_t ev_tracking = 0;
96 static uint32_t ev_tracking_barrier = 0;
97 static int ev_tracking_fd = -1;
98 
99 /*
100  * votequorum_exec defines/structs/forward definitions
101  */
104  struct qb_ipc_request_header header __attribute__((aligned(8)));
105  uint32_t nodeid;
106  uint32_t votes;
107  uint32_t expected_votes;
108  uint32_t flags;
109 } __attribute__((packed));
110 
112  struct qb_ipc_request_header header __attribute__((aligned(8)));
113  uint32_t nodeid;
114  uint32_t value;
115  uint8_t param;
116  uint8_t _pad0;
117  uint8_t _pad1;
118  uint8_t _pad2;
119 } __attribute__((packed));
120 
122  struct qb_ipc_request_header header __attribute__((aligned(8)));
123  uint32_t operation;
125 } __attribute__((packed));
126 
128  struct qb_ipc_request_header header __attribute__((aligned(8)));
131 } __attribute__((packed));
132 
133 /*
134  * votequorum_exec onwire version (via totem)
135  */
136 
137 #include "votequorum.h"
138 
139 /*
140  * votequorum_exec onwire messages (via totem)
141  */
142 
143 #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
144 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
145 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
146 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
147 
148 static void votequorum_exec_send_expectedvotes_notification(void);
149 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context);
150 
151 #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
152 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
153 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3
154 
155 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value);
156 
157 /*
158  * used by req_exec_quorum_qdevice_reg
159  */
160 #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
161 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
162 
163 /*
164  * votequorum internal node status/view
165  */
166 
167 #define NODE_FLAGS_QUORATE 1
168 #define NODE_FLAGS_LEAVING 2
169 #define NODE_FLAGS_WFASTATUS 4
170 #define NODE_FLAGS_FIRST 8
171 #define NODE_FLAGS_QDEVICE_REGISTERED 16
172 #define NODE_FLAGS_QDEVICE_ALIVE 32
173 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64
174 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128
175 
176 typedef enum {
180 } nodestate_t;
181 
182 struct cluster_node {
183  int node_id;
184  nodestate_t state;
185  uint32_t votes;
186  uint32_t expected_votes;
187  uint32_t flags;
188  struct list_head list;
189 };
190 
191 /*
192  * votequorum internal quorum status
193  */
194 
195 static uint8_t quorum;
196 static uint8_t cluster_is_quorate;
197 
198 /*
199  * votequorum membership data
200  */
201 
202 static struct cluster_node *us;
203 static struct list_head cluster_members_list;
204 static unsigned int quorum_members[PROCESSOR_COUNT_MAX];
205 static unsigned int previous_quorum_members[PROCESSOR_COUNT_MAX];
206 static unsigned int atb_nodelist[PROCESSOR_COUNT_MAX];
207 static int quorum_members_entries = 0;
208 static int previous_quorum_members_entries = 0;
209 static int atb_nodelist_entries = 0;
210 static struct memb_ring_id quorum_ringid;
211 
212 /*
213  * pre allocate all cluster_nodes + one for qdevice
214  */
215 static struct cluster_node cluster_nodes[PROCESSOR_COUNT_MAX+2];
216 static int cluster_nodes_entries = 0;
217 
218 /*
219  * votequorum tracking
220  */
221 struct quorum_pd {
222  unsigned char track_flags;
225  struct list_head list;
226  void *conn;
227 };
228 
229 static struct list_head trackers_list;
230 
231 /*
232  * votequorum timers
233  */
234 
235 static corosync_timer_handle_t qdevice_timer;
236 static int qdevice_timer_set = 0;
237 static corosync_timer_handle_t last_man_standing_timer;
238 static int last_man_standing_timer_set = 0;
239 static int sync_nodeinfo_sent = 0;
240 static int sync_wait_for_poll_or_timeout = 0;
241 
242 /*
243  * Service Interfaces required by service_message_handler struct
244  */
245 
246 static int sync_in_progress = 0;
247 
248 static void votequorum_sync_init (
249  const unsigned int *trans_list,
250  size_t trans_list_entries,
251  const unsigned int *member_list,
252  size_t member_list_entries,
253  const struct memb_ring_id *ring_id);
254 
255 static int votequorum_sync_process (void);
256 static void votequorum_sync_activate (void);
257 static void votequorum_sync_abort (void);
258 
259 static quorum_set_quorate_fn_t quorum_callback;
260 
261 /*
262  * votequorum_exec handler and definitions
263  */
264 
265 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api);
266 static int votequorum_exec_exit_fn (void);
267 static int votequorum_exec_send_nodeinfo(uint32_t nodeid);
268 
269 static void message_handler_req_exec_votequorum_nodeinfo (
270  const void *message,
271  unsigned int nodeid);
272 static void exec_votequorum_nodeinfo_endian_convert (void *message);
273 
274 static void message_handler_req_exec_votequorum_reconfigure (
275  const void *message,
276  unsigned int nodeid);
277 static void exec_votequorum_reconfigure_endian_convert (void *message);
278 
279 static void message_handler_req_exec_votequorum_qdevice_reg (
280  const void *message,
281  unsigned int nodeid);
282 static void exec_votequorum_qdevice_reg_endian_convert (void *message);
283 
284 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
285  const void *message,
286  unsigned int nodeid);
287 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message);
288 
289 static struct corosync_exec_handler votequorum_exec_engine[] =
290 {
291  { /* 0 */
292  .exec_handler_fn = message_handler_req_exec_votequorum_nodeinfo,
293  .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
294  },
295  { /* 1 */
296  .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
297  .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
298  },
299  { /* 2 */
300  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
301  .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
302  },
303  { /* 3 */
304  .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
305  .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
306  },
307 };
308 
309 /*
310  * Library Handler and Functions Definitions
311  */
312 
313 static int quorum_lib_init_fn (void *conn);
314 
315 static int quorum_lib_exit_fn (void *conn);
316 
317 static void qdevice_timer_fn(void *arg);
318 
319 static void message_handler_req_lib_votequorum_getinfo (void *conn,
320  const void *message);
321 
322 static void message_handler_req_lib_votequorum_setexpected (void *conn,
323  const void *message);
324 
325 static void message_handler_req_lib_votequorum_setvotes (void *conn,
326  const void *message);
327 
328 static void message_handler_req_lib_votequorum_trackstart (void *conn,
329  const void *message);
330 
331 static void message_handler_req_lib_votequorum_trackstop (void *conn,
332  const void *message);
333 
334 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
335  const void *message);
336 
337 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
338  const void *message);
339 
340 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
341  const void *message);
342 
343 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
344  const void *message);
345 
346 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
347  const void *message);
348 
349 static struct corosync_lib_handler quorum_lib_service[] =
350 {
351  { /* 0 */
352  .lib_handler_fn = message_handler_req_lib_votequorum_getinfo,
354  },
355  { /* 1 */
356  .lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
358  },
359  { /* 2 */
360  .lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
362  },
363  { /* 3 */
364  .lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
366  },
367  { /* 4 */
368  .lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
370  },
371  { /* 5 */
372  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
374  },
375  { /* 6 */
376  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
378  },
379  { /* 7 */
380  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
382  },
383  { /* 8 */
384  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
386  },
387  { /* 9 */
388  .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
390  }
391 };
392 
393 static struct corosync_service_engine votequorum_service_engine = {
394  .name = "corosync vote quorum service v1.0",
395  .id = VOTEQUORUM_SERVICE,
396  .priority = 2,
397  .private_data_size = sizeof (struct quorum_pd),
398  .allow_inquorate = CS_LIB_ALLOW_INQUORATE,
399  .flow_control = COROSYNC_LIB_FLOW_CONTROL_REQUIRED,
400  .lib_init_fn = quorum_lib_init_fn,
401  .lib_exit_fn = quorum_lib_exit_fn,
402  .lib_engine = quorum_lib_service,
403  .lib_engine_count = sizeof (quorum_lib_service) / sizeof (struct corosync_lib_handler),
404  .exec_init_fn = votequorum_exec_init_fn,
405  .exec_exit_fn = votequorum_exec_exit_fn,
406  .exec_engine = votequorum_exec_engine,
407  .exec_engine_count = sizeof (votequorum_exec_engine) / sizeof (struct corosync_exec_handler),
408  .sync_init = votequorum_sync_init,
409  .sync_process = votequorum_sync_process,
410  .sync_activate = votequorum_sync_activate,
411  .sync_abort = votequorum_sync_abort
412 };
413 
415 {
416  return (&votequorum_service_engine);
417 }
418 
419 static struct default_service votequorum_service[] = {
420  {
421  .name = "corosync_votequorum",
422  .ver = 0,
424  },
425 };
426 
427 /*
428  * common/utility macros/functions
429  */
430 
431 #define max(a,b) (((a) > (b)) ? (a) : (b))
432 
433 #define list_iterate(v, head) \
434  for (v = (head)->next; v != head; v = v->next)
435 
436 static void node_add_ordered(struct cluster_node *newnode)
437 {
438  struct cluster_node *node = NULL;
439  struct list_head *tmp;
440  struct list_head *newlist = &newnode->list;
441 
442  ENTER();
443 
444  list_iterate(tmp, &cluster_members_list) {
445  node = list_entry(tmp, struct cluster_node, list);
446  if (newnode->node_id < node->node_id) {
447  break;
448  }
449  }
450 
451  if (!node) {
452  list_add(&newnode->list, &cluster_members_list);
453  } else {
454  newlist->prev = tmp->prev;
455  newlist->next = tmp;
456  tmp->prev->next = newlist;
457  tmp->prev = newlist;
458  }
459 
460  LEAVE();
461 }
462 
463 static struct cluster_node *allocate_node(unsigned int nodeid)
464 {
465  struct cluster_node *cl = NULL;
466  struct list_head *tmp;
467 
468  ENTER();
469 
470  if (cluster_nodes_entries <= PROCESSOR_COUNT_MAX + 1) {
471  cl = (struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
472  cluster_nodes_entries++;
473  } else {
474  list_iterate(tmp, &cluster_members_list) {
475  cl = list_entry(tmp, struct cluster_node, list);
476  if (cl->state == NODESTATE_DEAD) {
477  break;
478  }
479  }
480  /*
481  * this should never happen
482  */
483  if (!cl) {
484  log_printf(LOGSYS_LEVEL_CRIT, "Unable to find memory for node %u data!!", nodeid);
485  goto out;
486  }
487  list_del(tmp);
488  }
489 
490  memset(cl, 0, sizeof(struct cluster_node));
491  cl->node_id = nodeid;
492  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
493  node_add_ordered(cl);
494  }
495 
496 out:
497  LEAVE();
498 
499  return cl;
500 }
501 
502 static struct cluster_node *find_node_by_nodeid(unsigned int nodeid)
503 {
504  struct cluster_node *node;
505  struct list_head *tmp;
506 
507  ENTER();
508 
509  if (nodeid == us->node_id) {
510  LEAVE();
511  return us;
512  }
513 
514  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
515  LEAVE();
516  return qdevice;
517  }
518 
519  list_iterate(tmp, &cluster_members_list) {
520  node = list_entry(tmp, struct cluster_node, list);
521  if (node->node_id == nodeid) {
522  LEAVE();
523  return node;
524  }
525  }
526 
527  LEAVE();
528  return NULL;
529 }
530 
531 static void get_lowest_node_id(void)
532 {
533  struct cluster_node *node = NULL;
534  struct list_head *tmp;
535 
536  ENTER();
537 
538  lowest_node_id = us->node_id;
539 
540  list_iterate(tmp, &cluster_members_list) {
541  node = list_entry(tmp, struct cluster_node, list);
542  if ((node->state == NODESTATE_MEMBER) &&
543  (node->node_id < lowest_node_id)) {
544  lowest_node_id = node->node_id;
545  }
546  }
547  log_printf(LOGSYS_LEVEL_DEBUG, "lowest node id: %d us: %d", lowest_node_id, us->node_id);
548  icmap_set_uint32("runtime.votequorum.lowest_node_id", lowest_node_id);
549 
550  LEAVE();
551 }
552 
553 static void get_highest_node_id(void)
554 {
555  struct cluster_node *node = NULL;
556  struct list_head *tmp;
557 
558  ENTER();
559 
560  highest_node_id = us->node_id;
561 
562  list_iterate(tmp, &cluster_members_list) {
563  node = list_entry(tmp, struct cluster_node, list);
564  if ((node->state == NODESTATE_MEMBER) &&
565  (node->node_id > highest_node_id)) {
566  highest_node_id = node->node_id;
567  }
568  }
569  log_printf(LOGSYS_LEVEL_DEBUG, "highest node id: %d us: %d", highest_node_id, us->node_id);
570  icmap_set_uint32("runtime.votequorum.highest_node_id", highest_node_id);
571 
572  LEAVE();
573 }
574 
575 static int check_low_node_id_partition(void)
576 {
577  struct cluster_node *node = NULL;
578  struct list_head *tmp;
579  int found = 0;
580 
581  ENTER();
582 
583  list_iterate(tmp, &cluster_members_list) {
584  node = list_entry(tmp, struct cluster_node, list);
585  if ((node->state == NODESTATE_MEMBER) &&
586  (node->node_id == lowest_node_id)) {
587  found = 1;
588  }
589  }
590 
591  LEAVE();
592  return found;
593 }
594 
595 static int check_high_node_id_partition(void)
596 {
597  struct cluster_node *node = NULL;
598  struct list_head *tmp;
599  int found = 0;
600 
601  ENTER();
602 
603  list_iterate(tmp, &cluster_members_list) {
604  node = list_entry(tmp, struct cluster_node, list);
605  if ((node->state == NODESTATE_MEMBER) &&
606  (node->node_id == highest_node_id)) {
607  found = 1;
608  }
609  }
610 
611  LEAVE();
612  return found;
613 }
614 
615 static int is_in_nodelist(int nodeid, unsigned int *members, int entries)
616 {
617  int i;
618  ENTER();
619 
620  for (i=0; i<entries; i++) {
621  if (nodeid == members[i]) {
622  LEAVE();
623  return 1;
624  }
625  }
626  LEAVE();
627  return 0;
628 }
629 
630 /*
631  * The algorithm for a list of tie-breaker nodes is:
632  * travel the list of nodes in the auto_tie_breaker list,
633  * if the node IS in our current partition, check if the
634  * nodes earlier in the atb list are in the 'previous' partition;
635  * If none are found then we are safe to be quorate, if any are
636  * then we cannot be as we don't know if that node is up or down.
637  * If we don't have a node in the current list we are NOT quorate.
638  * Obviously if we find the first node in the atb list in our
639  * partition then we are quorate.
640  *
641  * Special cases lowest nodeid, and highest nodeid are handled separately.
642  */
643 static int check_auto_tie_breaker(void)
644 {
645  int i, j;
646  int res;
647  ENTER();
648 
649  if (auto_tie_breaker == ATB_LOWEST) {
650  res = check_low_node_id_partition();
651  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LOWEST decision: %d", res);
652  LEAVE();
653  return res;
654  }
655  if (auto_tie_breaker == ATB_HIGHEST) {
656  res = check_high_node_id_partition();
657  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_HIGHEST decision: %d", res);
658  LEAVE();
659  return res;
660  }
661 
662  /* Assume ATB_LIST, we should never be called for ATB_NONE */
663  for (i=0; i < atb_nodelist_entries; i++) {
664  if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) {
665  /*
666  * Node is in our partition, if any of its predecessors are
667  * in the previous quorum partition then it might be in the
668  * 'other half' (as we've got this far without seeing it here)
669  * and so we can't be quorate.
670  */
671  for (j=0; j<i; j++) {
672  if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) {
673  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node %d in previous partition but not here, quorum denied", atb_nodelist[j]);
674  LEAVE();
675  return 0;
676  }
677  }
678 
679  /*
680  * None of the other list nodes were in the previous partition, if there
681  * are enough votes, we can be quorate
682  */
683  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found node %d in current partition, we can be quorate", atb_nodelist[i]);
684  LEAVE();
685  return 1;
686  }
687  }
688  log_printf(LOGSYS_LEVEL_DEBUG, "ATB_LIST found no list nodes in current partition, we cannot be quorate");
689  LEAVE();
690  return 0;
691 }
692 
693 /*
694  * atb_string can be either:
695  * 'lowest'
696  * 'highest'
697  * a list of nodeids
698  */
699 static void parse_atb_string(char *atb_string)
700 {
701  char *ptr;
702  long num;
703 
704  ENTER();
705  auto_tie_breaker = ATB_NONE;
706 
707  if (!strcmp(atb_string, "lowest"))
708  auto_tie_breaker = ATB_LOWEST;
709 
710  if (!strcmp(atb_string, "highest"))
711  auto_tie_breaker = ATB_HIGHEST;
712 
713  if (atoi(atb_string)) {
714 
715  atb_nodelist_entries = 0;
716  ptr = atb_string;
717  do {
718  num = strtol(ptr, &ptr, 10);
719  if (num) {
720  log_printf(LOGSYS_LEVEL_DEBUG, "ATB nodelist[%d] = %d", atb_nodelist_entries, num);
721  atb_nodelist[atb_nodelist_entries++] = num;
722  }
723  } while (num);
724 
725  if (atb_nodelist_entries) {
726  auto_tie_breaker = ATB_LIST;
727  }
728  }
729  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
730  log_printf(LOGSYS_LEVEL_DEBUG, "ATB type = %d", auto_tie_breaker);
731 
732  /* Make sure we got something */
733  if (auto_tie_breaker == ATB_NONE) {
734  log_printf(LOGSYS_LEVEL_WARNING, "auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled");
735  auto_tie_breaker = ATB_NONE;
736  }
737  LEAVE();
738 }
739 
740 static int check_qdevice_master(void)
741 {
742  struct cluster_node *node = NULL;
743  struct list_head *tmp;
744  int found = 0;
745 
746  ENTER();
747 
748  list_iterate(tmp, &cluster_members_list) {
749  node = list_entry(tmp, struct cluster_node, list);
750  if ((node->state == NODESTATE_MEMBER) &&
753  found = 1;
754  }
755  }
756 
757  LEAVE();
758  return found;
759 }
760 
761 static void decode_flags(uint32_t flags)
762 {
763  ENTER();
764 
766  "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
767  (flags & NODE_FLAGS_QUORATE)?"Yes":"No",
768  (flags & NODE_FLAGS_LEAVING)?"Yes":"No",
769  (flags & NODE_FLAGS_WFASTATUS)?"Yes":"No",
770  (flags & NODE_FLAGS_FIRST)?"Yes":"No",
771  (flags & NODE_FLAGS_QDEVICE_REGISTERED)?"Yes":"No",
772  (flags & NODE_FLAGS_QDEVICE_ALIVE)?"Yes":"No",
773  (flags & NODE_FLAGS_QDEVICE_CAST_VOTE)?"Yes":"No",
774  (flags & NODE_FLAGS_QDEVICE_MASTER_WINS)?"Yes":"No");
775 
776  LEAVE();
777 }
778 
779 /*
780  * load/save are copied almost pristine from totemsrp,c
781  */
782 static int load_ev_tracking_barrier(void)
783 {
784  int res = 0;
785  char filename[PATH_MAX];
786 
787  ENTER();
788 
789  snprintf(filename, sizeof(filename) - 1, "%s/ev_tracking", get_run_dir());
790 
791  ev_tracking_fd = open(filename, O_RDWR, 0700);
792  if (ev_tracking_fd != -1) {
793  res = read (ev_tracking_fd, &ev_tracking_barrier, sizeof(uint32_t));
794  if (res == sizeof (uint32_t)) {
795  LEAVE();
796  return 0;
797  }
798  }
799 
800  ev_tracking_barrier = 0;
801  umask(0);
802  ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
803  if (ev_tracking_fd != -1) {
804  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
805  if ((res == -1) || (res != sizeof (uint32_t))) {
807  "Unable to write to %s", filename);
808  }
809  LEAVE();
810  return 0;
811  }
813  "Unable to create %s file", filename);
814 
815  LEAVE();
816 
817  return -1;
818 }
819 
820 static void update_wait_for_all_status(uint8_t wfa_status)
821 {
822  ENTER();
823 
824  wait_for_all_status = wfa_status;
825  if (wait_for_all_status) {
827  } else {
828  us->flags &= ~NODE_FLAGS_WFASTATUS;
829  }
830  icmap_set_uint8("runtime.votequorum.wait_for_all_status",
831  wait_for_all_status);
832 
833  LEAVE();
834 }
835 
836 static void update_two_node(void)
837 {
838  ENTER();
839 
840  icmap_set_uint8("runtime.votequorum.two_node", two_node);
841 
842  LEAVE();
843 }
844 
845 static void update_ev_barrier(uint32_t expected_votes)
846 {
847  ENTER();
848 
849  ev_barrier = expected_votes;
850  icmap_set_uint32("runtime.votequorum.ev_barrier", ev_barrier);
851 
852  LEAVE();
853 }
854 
855 static void update_qdevice_can_operate(uint8_t status)
856 {
857  ENTER();
858 
859  qdevice_can_operate = status;
860  icmap_set_uint8("runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
861 
862  LEAVE();
863 }
864 
865 static void update_qdevice_master_wins(uint8_t allow)
866 {
867  ENTER();
868 
869  qdevice_master_wins = allow;
870  icmap_set_uint8("runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
871 
872  LEAVE();
873 }
874 
875 static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
876 {
877  int res;
878 
879  ENTER();
880 
881  ev_tracking_barrier = ev_t_barrier;
882  icmap_set_uint32("runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
883 
884  if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
886  "Unable to update ev_tracking_barrier on disk data!!!");
887  LEAVE();
888  return;
889  }
890 
891  res = write (ev_tracking_fd, &ev_tracking_barrier, sizeof (uint32_t));
892  if (res != sizeof (uint32_t)) {
894  "Unable to update ev_tracking_barrier on disk data!!!");
895  }
896 #ifdef HAVE_FDATASYNC
897  fdatasync(ev_tracking_fd);
898 #else
899  fsync(ev_tracking_fd);
900 #endif
901 
902  LEAVE();
903 }
904 
905 /*
906  * quorum calculation core bits
907  */
908 
909 static int calculate_quorum(int allow_decrease, unsigned int max_expected, unsigned int *ret_total_votes)
910 {
911  struct list_head *nodelist;
912  struct cluster_node *node;
913  unsigned int total_votes = 0;
914  unsigned int highest_expected = 0;
915  unsigned int newquorum, q1, q2;
916  unsigned int total_nodes = 0;
917 
918  ENTER();
919 
920  if ((allow_downscale) && (allow_decrease) && (max_expected)) {
921  max_expected = max(ev_barrier, max_expected);
922  }
923 
924  list_iterate(nodelist, &cluster_members_list) {
925  node = list_entry(nodelist, struct cluster_node, list);
926 
927  log_printf(LOGSYS_LEVEL_DEBUG, "node %u state=%d, votes=%u, expected=%u",
928  node->node_id, node->state, node->votes, node->expected_votes);
929 
930  if (node->state == NODESTATE_MEMBER) {
931  if (max_expected) {
932  node->expected_votes = max_expected;
933  } else {
934  highest_expected = max(highest_expected, node->expected_votes);
935  }
936  total_votes += node->votes;
937  total_nodes++;
938  }
939  }
940 
942  log_printf(LOGSYS_LEVEL_DEBUG, "node 0 state=1, votes=%u", qdevice->votes);
943  total_votes += qdevice->votes;
944  total_nodes++;
945  }
946 
947  if (max_expected > 0) {
948  highest_expected = max_expected;
949  }
950 
951  /*
952  * This quorum calculation is taken from the OpenVMS Cluster Systems
953  * manual, but, then, you guessed that didn't you
954  */
955  q1 = (highest_expected + 2) / 2;
956  q2 = (total_votes + 2) / 2;
957  newquorum = max(q1, q2);
958 
959  /*
960  * Normally quorum never decreases but the system administrator can
961  * force it down by setting expected votes to a maximum value
962  */
963  if (!allow_decrease) {
964  newquorum = max(quorum, newquorum);
965  }
966 
967  /*
968  * The special two_node mode allows each of the two nodes to retain
969  * quorum if the other fails. Only one of the two should live past
970  * fencing (as both nodes try to fence each other in split-brain.)
971  * Also: if there are more than two nodes, force us inquorate to avoid
972  * any damage or confusion.
973  */
974  if (two_node && total_nodes <= 2) {
975  newquorum = 1;
976  }
977 
978  if (ret_total_votes) {
979  *ret_total_votes = total_votes;
980  }
981 
982  LEAVE();
983  return newquorum;
984 }
985 
986 static void are_we_quorate(unsigned int total_votes)
987 {
988  int quorate;
989  int quorum_change = 0;
990 
991  ENTER();
992 
993  /*
994  * wait for all nodes to show up before granting quorum
995  */
996 
997  if ((wait_for_all) && (wait_for_all_status)) {
998  if (total_votes != us->expected_votes) {
1000  "Waiting for all cluster members. "
1001  "Current votes: %d expected_votes: %d",
1002  total_votes, us->expected_votes);
1003  cluster_is_quorate = 0;
1004  return;
1005  }
1006  update_wait_for_all_status(0);
1007  }
1008 
1009  if (quorum > total_votes) {
1010  quorate = 0;
1011  } else {
1012  quorate = 1;
1013  get_lowest_node_id();
1014  get_highest_node_id();
1015  }
1016 
1017  if ((auto_tie_breaker != ATB_NONE) &&
1018  /* Must be a half (or half-1) split */
1019  (total_votes == (us->expected_votes / 2)) &&
1020  /* If the 'other' partition in a split might have quorum then we can't run ATB */
1021  (previous_quorum_members_entries - quorum_members_entries < quorum) &&
1022  (check_auto_tie_breaker() == 1)) {
1023  quorate = 1;
1024  }
1025 
1026  if ((qdevice_master_wins) &&
1027  (!quorate) &&
1028  (check_qdevice_master() == 1)) {
1029  log_printf(LOGSYS_LEVEL_DEBUG, "node is quorate as part of master_wins partition");
1030  quorate = 1;
1031  }
1032 
1033  if (cluster_is_quorate && !quorate) {
1034  quorum_change = 1;
1035  log_printf(LOGSYS_LEVEL_DEBUG, "quorum lost, blocking activity");
1036  }
1037  if (!cluster_is_quorate && quorate) {
1038  quorum_change = 1;
1039  log_printf(LOGSYS_LEVEL_DEBUG, "quorum regained, resuming activity");
1040  }
1041 
1042  cluster_is_quorate = quorate;
1043  if (cluster_is_quorate) {
1044  us->flags |= NODE_FLAGS_QUORATE;
1045  } else {
1046  us->flags &= ~NODE_FLAGS_QUORATE;
1047  }
1048 
1049  if (wait_for_all) {
1050  if (quorate) {
1051  update_wait_for_all_status(0);
1052  } else {
1053  update_wait_for_all_status(1);
1054  }
1055  }
1056 
1057  if ((quorum_change) &&
1058  (sync_in_progress == 0)) {
1059  quorum_callback(quorum_members, quorum_members_entries,
1060  cluster_is_quorate, &quorum_ringid);
1061  votequorum_exec_send_quorum_notification(NULL, 0L);
1062  }
1063 
1064  LEAVE();
1065 }
1066 
1067 static void get_total_votes(unsigned int *totalvotes, unsigned int *current_members)
1068 {
1069  unsigned int total_votes = 0;
1070  unsigned int cluster_members = 0;
1071  struct list_head *nodelist;
1072  struct cluster_node *node;
1073 
1074  ENTER();
1075 
1076  list_iterate(nodelist, &cluster_members_list) {
1077  node = list_entry(nodelist, struct cluster_node, list);
1078  if (node->state == NODESTATE_MEMBER) {
1079  cluster_members++;
1080  total_votes += node->votes;
1081  }
1082  }
1083 
1084  if (qdevice->votes) {
1085  total_votes += qdevice->votes;
1086  cluster_members++;
1087  }
1088 
1089  *totalvotes = total_votes;
1090  *current_members = cluster_members;
1091 
1092  LEAVE();
1093 }
1094 
1095 /*
1096  * Recalculate cluster quorum, set quorate and notify changes
1097  */
1098 static void recalculate_quorum(int allow_decrease, int by_current_nodes)
1099 {
1100  unsigned int total_votes = 0;
1101  unsigned int cluster_members = 0;
1102 
1103  ENTER();
1104 
1105  get_total_votes(&total_votes, &cluster_members);
1106 
1107  if (!by_current_nodes) {
1108  cluster_members = 0;
1109  }
1110 
1111  /*
1112  * Keep expected_votes at the highest number of votes in the cluster
1113  */
1114  log_printf(LOGSYS_LEVEL_DEBUG, "total_votes=%d, expected_votes=%d", total_votes, us->expected_votes);
1115  if (total_votes > us->expected_votes) {
1116  us->expected_votes = total_votes;
1117  votequorum_exec_send_expectedvotes_notification();
1118  }
1119 
1120  if ((ev_tracking) &&
1121  (us->expected_votes > ev_tracking_barrier)) {
1122  update_ev_tracking_barrier(us->expected_votes);
1123  }
1124 
1125  quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
1126  are_we_quorate(total_votes);
1127 
1128  LEAVE();
1129 }
1130 
1131 /*
1132  * configuration bits and pieces
1133  */
1134 
1135 static int votequorum_read_nodelist_configuration(uint32_t *votes,
1136  uint32_t *nodes,
1137  uint32_t *expected_votes)
1138 {
1139  icmap_iter_t iter;
1140  const char *iter_key;
1141  char tmp_key[ICMAP_KEYNAME_MAXLEN];
1142  uint32_t our_pos, node_pos;
1143  uint32_t nodecount = 0;
1144  uint32_t nodelist_expected_votes = 0;
1145  uint32_t node_votes = 0;
1146  int res = 0;
1147 
1148  ENTER();
1149 
1150  if (icmap_get_uint32("nodelist.local_node_pos", &our_pos) != CS_OK) {
1152  "No nodelist defined or our node is not in the nodelist");
1153  return 0;
1154  }
1155 
1156  iter = icmap_iter_init("nodelist.node.");
1157 
1158  while ((iter_key = icmap_iter_next(iter, NULL, NULL)) != NULL) {
1159 
1160  res = sscanf(iter_key, "nodelist.node.%u.%s", &node_pos, tmp_key);
1161  if (res != 2) {
1162  continue;
1163  }
1164 
1165  if (strcmp(tmp_key, "ring0_addr") != 0) {
1166  continue;
1167  }
1168 
1169  nodecount++;
1170 
1171  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "nodelist.node.%u.quorum_votes", node_pos);
1172  if (icmap_get_uint32(tmp_key, &node_votes) != CS_OK) {
1173  node_votes = 1;
1174  }
1175 
1176  nodelist_expected_votes = nodelist_expected_votes + node_votes;
1177 
1178  if (node_pos == our_pos) {
1179  *votes = node_votes;
1180  }
1181  }
1182 
1183  *expected_votes = nodelist_expected_votes;
1184  *nodes = nodecount;
1185 
1186  icmap_iter_finalize(iter);
1187 
1188  LEAVE();
1189 
1190  return 1;
1191 }
1192 
1193 static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
1194 {
1195  char *qdevice_model = NULL;
1196  int ret = 0;
1197 
1198  ENTER();
1199 
1200  if (icmap_get_string("quorum.device.model", &qdevice_model) == CS_OK) {
1201  if (strlen(qdevice_model)) {
1202  if (icmap_get_uint32("quorum.device.votes", qdevice_votes) != CS_OK) {
1203  *qdevice_votes = -1;
1204  }
1205  if (icmap_get_uint32("quorum.device.timeout", &qdevice_timeout) != CS_OK) {
1206  qdevice_timeout = VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT;
1207  }
1208  if (icmap_get_uint32("quorum.device.sync_timeout", &qdevice_sync_timeout) != CS_OK) {
1209  qdevice_sync_timeout = VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT;
1210  }
1211  update_qdevice_can_operate(1);
1212  ret = 1;
1213  }
1214 
1215  free(qdevice_model);
1216  }
1217 
1218  LEAVE();
1219 
1220  return ret;
1221 }
1222 
1223 #define VOTEQUORUM_READCONFIG_STARTUP 0
1224 #define VOTEQUORUM_READCONFIG_RUNTIME 1
1225 
1226 static char *votequorum_readconfig(int runtime)
1227 {
1228  uint32_t node_votes = 0, qdevice_votes = 0;
1229  uint32_t node_expected_votes = 0, expected_votes = 0;
1230  uint32_t node_count = 0;
1231  uint8_t atb = 0;
1232  int have_nodelist, have_qdevice;
1233  char *atb_string = NULL;
1234  char *error = NULL;
1235 
1236  ENTER();
1237 
1238  log_printf(LOGSYS_LEVEL_DEBUG, "Reading configuration (runtime: %d)", runtime);
1239 
1240  /*
1241  * Set the few things we re-read at runtime back to their defaults
1242  */
1243  if (runtime) {
1244  two_node = 0;
1245  expected_votes = 0;
1246  }
1247 
1248  /*
1249  * gather basic data here
1250  */
1251  icmap_get_uint32("quorum.expected_votes", &expected_votes);
1252  have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
1253  have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
1254  icmap_get_uint8("quorum.two_node", &two_node);
1255 
1256  /*
1257  * do config verification and enablement
1258  */
1259 
1260  if ((!have_nodelist) && (!expected_votes)) {
1261  if (!runtime) {
1262  error = (char *)"configuration error: nodelist or quorum.expected_votes must be configured!";
1263  } else {
1264  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: nodelist or quorum.expected_votes must be configured!");
1265  log_printf(LOGSYS_LEVEL_CRIT, "will continue with current runtime data");
1266  }
1267  goto out;
1268  }
1269 
1270  /*
1271  * two_node and qdevice are not compatible in the same config.
1272  * try to make an educated guess of what to do
1273  */
1274 
1275  if ((two_node) && (have_qdevice)) {
1276  if (!runtime) {
1277  error = (char *)"configuration error: two_node and quorum device cannot be configured at the same time!";
1278  goto out;
1279  } else {
1280  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: two_node and quorum device cannot be configured at the same time!");
1282  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is registered, disabling two_node");
1283  two_node = 0;
1284  } else {
1285  log_printf(LOGSYS_LEVEL_CRIT, "quorum device is not registered, allowing two_node");
1286  update_qdevice_can_operate(0);
1287  }
1288  }
1289  }
1290 
1291  /*
1292  * Enable special features
1293  */
1294  if (!runtime) {
1295  if (two_node) {
1296  wait_for_all = 1;
1297  }
1298 
1299  icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
1300  icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
1301  icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
1302  icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
1303  icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
1304  icmap_get_uint8("quorum.auto_tie_breaker", &atb);
1305  icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
1306 
1307  /* auto_tie_breaker defaults to LOWEST */
1308  if (atb) {
1309  auto_tie_breaker = ATB_LOWEST;
1310  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1311  }
1312  else {
1313  auto_tie_breaker = ATB_NONE;
1314  if (atb_string) {
1316  "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0");
1317  }
1318  }
1319 
1320  if (atb && atb_string) {
1321  parse_atb_string(atb_string);
1322  }
1323  free(atb_string);
1324 
1325  /* allow_downscale requires ev_tracking */
1326  if (allow_downscale) {
1327  ev_tracking = 1;
1328  }
1329 
1330  if (ev_tracking) {
1331  if (load_ev_tracking_barrier() < 0) {
1332  LEAVE();
1333  return ((char *)"Unable to load ev_tracking file!");
1334  }
1335  update_ev_tracking_barrier(ev_tracking_barrier);
1336  }
1337 
1338  }
1339 
1340  /* two_node and auto_tie_breaker are not compatible as two_node uses
1341  * a fence race to decide quorum whereas ATB decides based on node id
1342  */
1343  if (two_node && auto_tie_breaker != ATB_NONE) {
1344  log_printf(LOGSYS_LEVEL_CRIT, "two_node and auto_tie_breaker are both specified but are not compatible.");
1345  log_printf(LOGSYS_LEVEL_CRIT, "two_node has been disabled, please fix your corosync.conf");
1346  two_node = 0;
1347  }
1348 
1349  /* If ATB is set and the cluster has an odd number of nodes then wait_for_all needs
1350  * to be set so that an isolated half+1 without the tie breaker node
1351  * does not have quorum on reboot.
1352  */
1353  if ((auto_tie_breaker != ATB_NONE) && (node_expected_votes % 2) &&
1354  (!wait_for_all)) {
1355  if (last_man_standing) {
1356  /* if LMS is set too, it's a fatal configuration error. We can't dictate to the user what
1357  * they might want so we'll just quit.
1358  */
1359  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set, the cluster has an odd number of nodes\n");
1360  log_printf(LOGSYS_LEVEL_CRIT, "and last_man_standing is also set. With this situation a better\n");
1361  log_printf(LOGSYS_LEVEL_CRIT, "solution would be to disable LMS, leave ATB enabled, and also\n");
1362  log_printf(LOGSYS_LEVEL_CRIT, "enable wait_for_all (mandatory for ATB in odd-numbered clusters).\n");
1363  log_printf(LOGSYS_LEVEL_CRIT, "Due to this ambiguity, corosync will fail to start. Please fix your corosync.conf\n");
1364  error = (char *)"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster";
1365  goto out;
1366  }
1367  else {
1368  log_printf(LOGSYS_LEVEL_CRIT, "auto_tie_breaker is set and the cluster has an odd number of nodes.\n");
1369  log_printf(LOGSYS_LEVEL_CRIT, "wait_for_all needs to be set for this configuration but it is missing\n");
1370  log_printf(LOGSYS_LEVEL_CRIT, "Therefore auto_tie_breaker has been disabled. Please fix your corosync.conf\n");
1371  auto_tie_breaker = ATB_NONE;
1372  icmap_set_uint32("runtime.votequorum.atb_type", auto_tie_breaker);
1373  }
1374  }
1375 
1376  /*
1377  * quorum device is not compatible with last_man_standing and auto_tie_breaker
1378  * neither lms or atb can be set at runtime, so there is no need to check for
1379  * runtime incompatibilities, but qdevice can be configured _after_ LMS and ATB have
1380  * been enabled at startup.
1381  */
1382 
1383  if ((have_qdevice) && (last_man_standing)) {
1384  if (!runtime) {
1385  error = (char *)"configuration error: quorum.device is not compatible with last_man_standing";
1386  goto out;
1387  } else {
1388  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with last_man_standing");
1389  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1390  update_qdevice_can_operate(0);
1391  }
1392  }
1393 
1394  if ((have_qdevice) && (auto_tie_breaker != ATB_NONE)) {
1395  if (!runtime) {
1396  error = (char *)"configuration error: quorum.device is not compatible with auto_tie_breaker";
1397  goto out;
1398  } else {
1399  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with auto_tie_breaker");
1400  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1401  update_qdevice_can_operate(0);
1402  }
1403  }
1404 
1405  if ((have_qdevice) && (wait_for_all)) {
1406  if (!runtime) {
1407  error = (char *)"configuration error: quorum.device is not compatible with wait_for_all";
1408  goto out;
1409  } else {
1410  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with wait_for_all");
1411  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1412  update_qdevice_can_operate(0);
1413  }
1414  }
1415 
1416  if ((have_qdevice) && (allow_downscale)) {
1417  if (!runtime) {
1418  error = (char *)"configuration error: quorum.device is not compatible with allow_downscale";
1419  goto out;
1420  } else {
1421  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device is not compatible with allow_downscale");
1422  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1423  update_qdevice_can_operate(0);
1424  }
1425  }
1426 
1427  /*
1428  * if user specifies quorum.expected_votes + quorum.device but NOT the device.votes
1429  * we don't know what the quorum device should vote.
1430  */
1431 
1432  if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
1433  if (!runtime) {
1434  error = (char *)"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
1435  goto out;
1436  } else {
1437  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when quorum.expected_votes is set");
1438  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1439  update_qdevice_can_operate(0);
1440  }
1441  }
1442 
1443  /*
1444  * if user specifies a node list with uneven votes and no device.votes
1445  * we cannot autocalculate the votes
1446  */
1447 
1448  if ((have_qdevice) &&
1449  (qdevice_votes == -1) &&
1450  (have_nodelist) &&
1451  (node_count != node_expected_votes)) {
1452  if (!runtime) {
1453  error = (char *)"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
1454  goto out;
1455  } else {
1456  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes must be specified when not all nodes votes 1");
1457  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1458  update_qdevice_can_operate(0);
1459  }
1460  }
1461 
1462  /*
1463  * validate quorum device votes vs expected_votes
1464  */
1465 
1466  if ((qdevice_votes > 0) && (expected_votes)) {
1467  int delta = expected_votes - qdevice_votes;
1468  if (delta < 2) {
1469  if (!runtime) {
1470  error = (char *)"configuration error: quorum.device.votes is too high or expected_votes is too low";
1471  goto out;
1472  } else {
1473  log_printf(LOGSYS_LEVEL_CRIT, "configuration error: quorum.device.votes is too high or expected_votes is too low");
1474  log_printf(LOGSYS_LEVEL_CRIT, "disabling quorum device operations");
1475  update_qdevice_can_operate(0);
1476  }
1477  }
1478  }
1479 
1480  /*
1481  * automatically calculate device votes and adjust expected_votes from nodelist
1482  */
1483 
1484  if ((have_qdevice) &&
1485  (qdevice_votes == -1) &&
1486  (!expected_votes) &&
1487  (have_nodelist) &&
1488  (node_count == node_expected_votes)) {
1489  qdevice_votes = node_expected_votes - 1;
1490  node_expected_votes = node_expected_votes + qdevice_votes;
1491  }
1492 
1493  /*
1494  * set this node votes and expected_votes
1495  */
1496  log_printf(LOGSYS_LEVEL_DEBUG, "ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
1497 
1498  if (ev_tracking) {
1499  expected_votes = ev_tracking_barrier;
1500  }
1501 
1502  if (have_nodelist) {
1503  us->votes = node_votes;
1504  us->expected_votes = node_expected_votes;
1505  } else {
1506  us->votes = 1;
1507  icmap_get_uint32("quorum.votes", &us->votes);
1508  }
1509 
1510  if (expected_votes) {
1512  }
1513 
1514  /*
1515  * set qdevice votes
1516  */
1517 
1518  if (!have_qdevice) {
1519  qdevice->votes = 0;
1520  }
1521 
1522  if (qdevice_votes != -1) {
1523  qdevice->votes = qdevice_votes;
1524  }
1525 
1526  update_ev_barrier(us->expected_votes);
1527  update_two_node();
1528  if (wait_for_all) {
1529  update_wait_for_all_status(1);
1530  }
1531 
1532 out:
1533  LEAVE();
1534  return error;
1535 }
1536 
1537 static void votequorum_refresh_config(
1538  int32_t event,
1539  const char *key_name,
1540  struct icmap_notify_value new_val,
1541  struct icmap_notify_value old_val,
1542  void *user_data)
1543 {
1544  int old_votes, old_expected_votes;
1545  uint8_t reloading;
1546  uint8_t cancel_wfa;
1547 
1548  ENTER();
1549 
1550  /*
1551  * If a full reload is in progress then don't do anything until it's done and
1552  * can reconfigure it all atomically
1553  */
1554  if (icmap_get_uint8("config.totemconfig_reload_in_progress", &reloading) == CS_OK && reloading) {
1555  return ;
1556  }
1557 
1558  icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
1559  if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
1560  cancel_wfa >= 1) {
1561  icmap_set_uint8("quorum.cancel_wait_for_all", 0);
1562  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA,
1563  us->node_id, 0);
1564  return;
1565  }
1566 
1567  old_votes = us->votes;
1568  old_expected_votes = us->expected_votes;
1569 
1570  /*
1571  * Reload the configuration
1572  */
1573  votequorum_readconfig(VOTEQUORUM_READCONFIG_RUNTIME);
1574 
1575  /*
1576  * activate new config
1577  */
1578  votequorum_exec_send_nodeinfo(us->node_id);
1579  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1580  if (us->votes != old_votes) {
1581  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES,
1582  us->node_id, us->votes);
1583  }
1584  if (us->expected_votes != old_expected_votes) {
1585  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES,
1586  us->node_id, us->expected_votes);
1587  }
1588 
1589  LEAVE();
1590 }
1591 
1592 static void votequorum_exec_add_config_notification(void)
1593 {
1594  icmap_track_t icmap_track_nodelist = NULL;
1595  icmap_track_t icmap_track_quorum = NULL;
1596  icmap_track_t icmap_track_reload = NULL;
1597 
1598  ENTER();
1599 
1600  icmap_track_add("nodelist.",
1602  votequorum_refresh_config,
1603  NULL,
1604  &icmap_track_nodelist);
1605 
1606  icmap_track_add("quorum.",
1608  votequorum_refresh_config,
1609  NULL,
1610  &icmap_track_quorum);
1611 
1612  icmap_track_add("config.totemconfig_reload_in_progress",
1614  votequorum_refresh_config,
1615  NULL,
1616  &icmap_track_reload);
1617 
1618  LEAVE();
1619 }
1620 
1621 /*
1622  * votequorum_exec core
1623  */
1624 
1625 static int votequorum_exec_send_reconfigure(uint8_t param, unsigned int nodeid, uint32_t value)
1626 {
1627  struct req_exec_quorum_reconfigure req_exec_quorum_reconfigure;
1628  struct iovec iov[1];
1629  int ret;
1630 
1631  ENTER();
1632 
1633  req_exec_quorum_reconfigure.nodeid = nodeid;
1634  req_exec_quorum_reconfigure.value = value;
1635  req_exec_quorum_reconfigure.param = param;
1636  req_exec_quorum_reconfigure._pad0 = 0;
1637  req_exec_quorum_reconfigure._pad1 = 0;
1638  req_exec_quorum_reconfigure._pad2 = 0;
1639 
1640  req_exec_quorum_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE);
1641  req_exec_quorum_reconfigure.header.size = sizeof(req_exec_quorum_reconfigure);
1642 
1643  iov[0].iov_base = (void *)&req_exec_quorum_reconfigure;
1644  iov[0].iov_len = sizeof(req_exec_quorum_reconfigure);
1645 
1646  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1647 
1648  LEAVE();
1649  return ret;
1650 }
1651 
1652 static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
1653 {
1654  struct req_exec_quorum_nodeinfo req_exec_quorum_nodeinfo;
1655  struct iovec iov[1];
1656  struct cluster_node *node;
1657  int ret;
1658 
1659  ENTER();
1660 
1661  node = find_node_by_nodeid(nodeid);
1662  if (!node) {
1663  return -1;
1664  }
1665 
1666  req_exec_quorum_nodeinfo.nodeid = nodeid;
1667  req_exec_quorum_nodeinfo.votes = node->votes;
1668  req_exec_quorum_nodeinfo.expected_votes = node->expected_votes;
1669  req_exec_quorum_nodeinfo.flags = node->flags;
1670  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
1671  decode_flags(node->flags);
1672  }
1673 
1674  req_exec_quorum_nodeinfo.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO);
1675  req_exec_quorum_nodeinfo.header.size = sizeof(req_exec_quorum_nodeinfo);
1676 
1677  iov[0].iov_base = (void *)&req_exec_quorum_nodeinfo;
1678  iov[0].iov_len = sizeof(req_exec_quorum_nodeinfo);
1679 
1680  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1681 
1682  LEAVE();
1683  return ret;
1684 }
1685 
1686 static int votequorum_exec_send_qdevice_reconfigure(const char *oldname, const char *newname)
1687 {
1688  struct req_exec_quorum_qdevice_reconfigure req_exec_quorum_qdevice_reconfigure;
1689  struct iovec iov[1];
1690  int ret;
1691 
1692  ENTER();
1693 
1694  req_exec_quorum_qdevice_reconfigure.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE);
1695  req_exec_quorum_qdevice_reconfigure.header.size = sizeof(req_exec_quorum_qdevice_reconfigure);
1696  strcpy(req_exec_quorum_qdevice_reconfigure.oldname, oldname);
1697  strcpy(req_exec_quorum_qdevice_reconfigure.newname, newname);
1698 
1699  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reconfigure;
1700  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reconfigure);
1701 
1702  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1703 
1704  LEAVE();
1705  return ret;
1706 }
1707 
1708 static int votequorum_exec_send_qdevice_reg(uint32_t operation, const char *qdevice_name_req)
1709 {
1710  struct req_exec_quorum_qdevice_reg req_exec_quorum_qdevice_reg;
1711  struct iovec iov[1];
1712  int ret;
1713 
1714  ENTER();
1715 
1716  req_exec_quorum_qdevice_reg.header.id = SERVICE_ID_MAKE(VOTEQUORUM_SERVICE, MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG);
1717  req_exec_quorum_qdevice_reg.header.size = sizeof(req_exec_quorum_qdevice_reg);
1718  req_exec_quorum_qdevice_reg.operation = operation;
1719  strcpy(req_exec_quorum_qdevice_reg.qdevice_name, qdevice_name_req);
1720 
1721  iov[0].iov_base = (void *)&req_exec_quorum_qdevice_reg;
1722  iov[0].iov_len = sizeof(req_exec_quorum_qdevice_reg);
1723 
1724  ret = corosync_api->totem_mcast (iov, 1, TOTEM_AGREED);
1725 
1726  LEAVE();
1727  return ret;
1728 }
1729 
1730 static int votequorum_exec_send_quorum_notification(void *conn, uint64_t context)
1731 {
1733  struct list_head *tmp;
1734  struct cluster_node *node;
1735  int cluster_members = 0;
1736  int i = 0;
1737  int size;
1738  char buf[sizeof(struct res_lib_votequorum_notification) + sizeof(struct votequorum_node) * (PROCESSOR_COUNT_MAX + 2)];
1739 
1740  ENTER();
1741 
1742  list_iterate(tmp, &cluster_members_list) {
1743  node = list_entry(tmp, struct cluster_node, list);
1744  cluster_members++;
1745  }
1747  cluster_members++;
1748  }
1749 
1750  size = sizeof(struct res_lib_votequorum_notification) + sizeof(struct votequorum_node) * cluster_members;
1751 
1752  res_lib_votequorum_notification = (struct res_lib_votequorum_notification *)&buf;
1753  res_lib_votequorum_notification->quorate = cluster_is_quorate;
1754  res_lib_votequorum_notification->node_list_entries = cluster_members;
1755  res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.rep.nodeid;
1756  res_lib_votequorum_notification->ring_id.seq = quorum_ringid.seq;
1757  res_lib_votequorum_notification->context = context;
1758  list_iterate(tmp, &cluster_members_list) {
1759  node = list_entry(tmp, struct cluster_node, list);
1760  res_lib_votequorum_notification->node_list[i].nodeid = node->node_id;
1761  res_lib_votequorum_notification->node_list[i++].state = node->state;
1762  }
1764  res_lib_votequorum_notification->node_list[i].nodeid = VOTEQUORUM_QDEVICE_NODEID;
1765  res_lib_votequorum_notification->node_list[i++].state = qdevice->state;
1766  }
1767  res_lib_votequorum_notification->header.id = MESSAGE_RES_VOTEQUORUM_NOTIFICATION;
1768  res_lib_votequorum_notification->header.size = size;
1769  res_lib_votequorum_notification->header.error = CS_OK;
1770 
1771  /* Send it to all interested parties */
1772  if (conn) {
1773  int ret = corosync_api->ipc_dispatch_send(conn, &buf, size);
1774  LEAVE();
1775  return ret;
1776  } else {
1777  struct quorum_pd *qpd;
1778 
1779  list_iterate(tmp, &trackers_list) {
1780  qpd = list_entry(tmp, struct quorum_pd, list);
1781  res_lib_votequorum_notification->context = qpd->tracking_context;
1782  corosync_api->ipc_dispatch_send(qpd->conn, &buf, size);
1783  }
1784  }
1785 
1786  LEAVE();
1787 
1788  return 0;
1789 }
1790 
1791 static void votequorum_exec_send_expectedvotes_notification(void)
1792 {
1793  struct res_lib_votequorum_expectedvotes_notification res_lib_votequorum_expectedvotes_notification;
1794  struct quorum_pd *qpd;
1795  struct list_head *tmp;
1796 
1797  ENTER();
1798 
1799  log_printf(LOGSYS_LEVEL_DEBUG, "Sending expected votes callback");
1800 
1801  res_lib_votequorum_expectedvotes_notification.header.id = MESSAGE_RES_VOTEQUORUM_EXPECTEDVOTES_NOTIFICATION;
1802  res_lib_votequorum_expectedvotes_notification.header.size = sizeof(res_lib_votequorum_expectedvotes_notification);
1803  res_lib_votequorum_expectedvotes_notification.header.error = CS_OK;
1804  res_lib_votequorum_expectedvotes_notification.expected_votes = us->expected_votes;
1805 
1806  list_iterate(tmp, &trackers_list) {
1807  qpd = list_entry(tmp, struct quorum_pd, list);
1808  res_lib_votequorum_expectedvotes_notification.context = qpd->tracking_context;
1809  corosync_api->ipc_dispatch_send(qpd->conn, &res_lib_votequorum_expectedvotes_notification,
1810  sizeof(struct res_lib_votequorum_expectedvotes_notification));
1811  }
1812 
1813  LEAVE();
1814 }
1815 
1816 static void exec_votequorum_qdevice_reconfigure_endian_convert (void *message)
1817 {
1818  ENTER();
1819 
1820  LEAVE();
1821 }
1822 
1823 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
1824  const void *message,
1825  unsigned int nodeid)
1826 {
1828 
1829  ENTER();
1830 
1831  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice name change req from node %u [from: %s to: %s]",
1832  nodeid,
1833  req_exec_quorum_qdevice_reconfigure->oldname,
1834  req_exec_quorum_qdevice_reconfigure->newname);
1835 
1836  if (!strcmp(req_exec_quorum_qdevice_reconfigure->oldname, qdevice_name)) {
1837  log_printf(LOGSYS_LEVEL_DEBUG, "Allowing qdevice rename");
1838  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1839  strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->newname);
1840  /*
1841  * TODO: notify qdevices about name change?
1842  * this is not relevant for now and can wait later on since
1843  * qdevices are local only and libvotequorum is not final
1844  */
1845  }
1846 
1847  LEAVE();
1848 }
1849 
1850 static void exec_votequorum_qdevice_reg_endian_convert (void *message)
1851 {
1853 
1854  ENTER();
1855 
1856  req_exec_quorum_qdevice_reg->operation = swab32(req_exec_quorum_qdevice_reg->operation);
1857 
1858  LEAVE();
1859 }
1860 
1861 static void message_handler_req_exec_votequorum_qdevice_reg (
1862  const void *message,
1863  unsigned int nodeid)
1864 {
1866  struct res_lib_votequorum_status res_lib_votequorum_status;
1867  int wipe_qdevice_name = 1;
1868  struct cluster_node *node = NULL;
1869  struct list_head *tmp;
1870  cs_error_t error = CS_OK;
1871 
1872  ENTER();
1873 
1874  log_printf(LOGSYS_LEVEL_DEBUG, "Received qdevice op %u req from node %u [%s]",
1875  req_exec_quorum_qdevice_reg->operation,
1876  nodeid, req_exec_quorum_qdevice_reg->qdevice_name);
1877 
1878  switch(req_exec_quorum_qdevice_reg->operation)
1879  {
1881  if (nodeid != us->node_id) {
1882  if (!strlen(qdevice_name)) {
1883  log_printf(LOGSYS_LEVEL_DEBUG, "Remote qdevice name recorded");
1884  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
1885  }
1886  LEAVE();
1887  return;
1888  }
1889 
1890  /*
1891  * protect against the case where we broadcast qdevice registration
1892  * to new memebers, we receive the message back, but there is no registration
1893  * connection in progress
1894  */
1896  LEAVE();
1897  return;
1898  }
1899 
1900  /*
1901  * this should NEVER happen
1902  */
1903  if (!qdevice_reg_conn) {
1904  log_printf(LOGSYS_LEVEL_WARNING, "Unable to determine origin of the qdevice register call!");
1905  LEAVE();
1906  return;
1907  }
1908 
1909  /*
1910  * registering our own device in this case
1911  */
1912  if (!strlen(qdevice_name)) {
1913  strcpy(qdevice_name, req_exec_quorum_qdevice_reg->qdevice_name);
1914  }
1915 
1916  /*
1917  * check if it is our device or something else
1918  */
1919  if ((!strncmp(req_exec_quorum_qdevice_reg->qdevice_name,
1920  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
1922  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
1923  votequorum_exec_send_nodeinfo(us->node_id);
1924  } else {
1926  "A new qdevice with different name (new: %s old: %s) is trying to register!",
1927  req_exec_quorum_qdevice_reg->qdevice_name, qdevice_name);
1928  error = CS_ERR_EXIST;
1929  }
1930 
1931  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
1932  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
1933  res_lib_votequorum_status.header.error = error;
1934  corosync_api->ipc_response_send(qdevice_reg_conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
1935  qdevice_reg_conn = NULL;
1936  break;
1938  list_iterate(tmp, &cluster_members_list) {
1939  node = list_entry(tmp, struct cluster_node, list);
1940  if ((node->state == NODESTATE_MEMBER) &&
1942  wipe_qdevice_name = 0;
1943  }
1944  }
1945 
1946  if (wipe_qdevice_name) {
1947  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
1948  }
1949 
1950  break;
1951  }
1952  LEAVE();
1953 }
1954 
1955 static void exec_votequorum_nodeinfo_endian_convert (void *message)
1956 {
1957  struct req_exec_quorum_nodeinfo *nodeinfo = message;
1958 
1959  ENTER();
1960 
1961  nodeinfo->nodeid = swab32(nodeinfo->nodeid);
1962  nodeinfo->votes = swab32(nodeinfo->votes);
1963  nodeinfo->expected_votes = swab32(nodeinfo->expected_votes);
1964  nodeinfo->flags = swab32(nodeinfo->flags);
1965 
1966  LEAVE();
1967 }
1968 
1969 static void message_handler_req_exec_votequorum_nodeinfo (
1970  const void *message,
1971  unsigned int sender_nodeid)
1972 {
1973  const struct req_exec_quorum_nodeinfo *req_exec_quorum_nodeinfo = message;
1974  struct cluster_node *node = NULL;
1975  int old_votes;
1976  int old_expected;
1977  uint32_t old_flags;
1978  nodestate_t old_state;
1979  int new_node = 0;
1980  int allow_downgrade = 0;
1981  int by_node = 0;
1982  unsigned int nodeid = req_exec_quorum_nodeinfo->nodeid;
1983 
1984  ENTER();
1985 
1986  log_printf(LOGSYS_LEVEL_DEBUG, "got nodeinfo message from cluster node %u", sender_nodeid);
1987  log_printf(LOGSYS_LEVEL_DEBUG, "nodeinfo message[%u]: votes: %d, expected: %d flags: %d",
1988  nodeid,
1989  req_exec_quorum_nodeinfo->votes,
1990  req_exec_quorum_nodeinfo->expected_votes,
1991  req_exec_quorum_nodeinfo->flags);
1992 
1993  if (nodeid != VOTEQUORUM_QDEVICE_NODEID) {
1994  decode_flags(req_exec_quorum_nodeinfo->flags);
1995  }
1996 
1997  node = find_node_by_nodeid(nodeid);
1998  if (!node) {
1999  node = allocate_node(nodeid);
2000  new_node = 1;
2001  }
2002  if (!node) {
2003  corosync_api->error_memory_failure();
2004  LEAVE();
2005  return;
2006  }
2007 
2008  if (new_node) {
2009  old_votes = 0;
2010  old_expected = 0;
2011  old_state = NODESTATE_DEAD;
2012  old_flags = 0;
2013  } else {
2014  old_votes = node->votes;
2015  old_expected = node->expected_votes;
2016  old_state = node->state;
2017  old_flags = node->flags;
2018  }
2019 
2020  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2021  struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
2022 
2023  assert(sender_node != NULL);
2024 
2025  if ((!cluster_is_quorate) &&
2026  (sender_node->flags & NODE_FLAGS_QUORATE)) {
2027  node->votes = req_exec_quorum_nodeinfo->votes;
2028  } else {
2029  node->votes = max(node->votes, req_exec_quorum_nodeinfo->votes);
2030  }
2031  goto recalculate;
2032  }
2033 
2034  /* Update node state */
2035  node->flags = req_exec_quorum_nodeinfo->flags;
2036  node->votes = req_exec_quorum_nodeinfo->votes;
2037  node->state = NODESTATE_MEMBER;
2038 
2039  if (node->flags & NODE_FLAGS_LEAVING) {
2040  node->state = NODESTATE_LEAVING;
2041  allow_downgrade = 1;
2042  by_node = 1;
2043  }
2044 
2045  if ((!cluster_is_quorate) &&
2046  (node->flags & NODE_FLAGS_QUORATE)) {
2047  allow_downgrade = 1;
2048  us->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2049  }
2050 
2051  if (node->flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
2052  node->expected_votes = req_exec_quorum_nodeinfo->expected_votes;
2053  } else {
2054  node->expected_votes = us->expected_votes;
2055  }
2056 
2057  if ((last_man_standing) && (node->votes > 1)) {
2058  log_printf(LOGSYS_LEVEL_WARNING, "Last Man Standing feature is supported only when all"
2059  "cluster nodes votes are set to 1. Disabling LMS.");
2060  last_man_standing = 0;
2061  if (last_man_standing_timer_set) {
2062  corosync_api->timer_delete(last_man_standing_timer);
2063  last_man_standing_timer_set = 0;
2064  }
2065  }
2066 
2067 recalculate:
2068  if ((new_node) ||
2069  (nodeid == us->node_id) ||
2070  (node->flags & NODE_FLAGS_FIRST) ||
2071  (old_votes != node->votes) ||
2072  (old_expected != node->expected_votes) ||
2073  (old_flags != node->flags) ||
2074  (old_state != node->state)) {
2075  recalculate_quorum(allow_downgrade, by_node);
2076  }
2077 
2078  if ((wait_for_all) &&
2079  (!(node->flags & NODE_FLAGS_WFASTATUS)) &&
2080  (node->flags & NODE_FLAGS_QUORATE)) {
2081  update_wait_for_all_status(0);
2082  }
2083 
2084  LEAVE();
2085 }
2086 
2087 static void exec_votequorum_reconfigure_endian_convert (void *message)
2088 {
2089  struct req_exec_quorum_reconfigure *reconfigure = message;
2090 
2091  ENTER();
2092 
2093  reconfigure->nodeid = swab32(reconfigure->nodeid);
2094  reconfigure->value = swab32(reconfigure->value);
2095 
2096  LEAVE();
2097 }
2098 
2099 static void message_handler_req_exec_votequorum_reconfigure (
2100  const void *message,
2101  unsigned int nodeid)
2102 {
2104  struct cluster_node *node;
2105  struct list_head *nodelist;
2106 
2107  ENTER();
2108 
2109  log_printf(LOGSYS_LEVEL_DEBUG, "got reconfigure message from cluster node %u for %u",
2110  nodeid, req_exec_quorum_reconfigure->nodeid);
2111 
2112  switch(req_exec_quorum_reconfigure->param)
2113  {
2115  list_iterate(nodelist, &cluster_members_list) {
2116  node = list_entry(nodelist, struct cluster_node, list);
2117  if (node->state == NODESTATE_MEMBER) {
2118  node->expected_votes = req_exec_quorum_reconfigure->value;
2119  }
2120  }
2121  votequorum_exec_send_expectedvotes_notification();
2122  update_ev_barrier(req_exec_quorum_reconfigure->value);
2123  if (ev_tracking) {
2124  us->expected_votes = max(us->expected_votes, ev_tracking_barrier);
2125  }
2126  recalculate_quorum(1, 0); /* Allow decrease */
2127  break;
2128 
2130  node = find_node_by_nodeid(req_exec_quorum_reconfigure->nodeid);
2131  if (!node) {
2132  LEAVE();
2133  return;
2134  }
2135  node->votes = req_exec_quorum_reconfigure->value;
2136  recalculate_quorum(1, 0); /* Allow decrease */
2137  break;
2138 
2140  update_wait_for_all_status(0);
2141  log_printf(LOGSYS_LEVEL_INFO, "wait_for_all_status reset by user on node %d.",
2142  req_exec_quorum_reconfigure->nodeid);
2143  recalculate_quorum(0, 0);
2144 
2145  break;
2146 
2147  }
2148 
2149  LEAVE();
2150 }
2151 
2152 static int votequorum_exec_exit_fn (void)
2153 {
2154  int ret = 0;
2155 
2156  ENTER();
2157 
2158  /*
2159  * tell the other nodes we are leaving
2160  */
2161 
2162  if (allow_downscale) {
2163  us->flags |= NODE_FLAGS_LEAVING;
2164  ret = votequorum_exec_send_nodeinfo(us->node_id);
2165  }
2166 
2167  if ((ev_tracking) && (ev_tracking_fd != -1)) {
2168  close(ev_tracking_fd);
2169  }
2170 
2171 
2172  LEAVE();
2173  return ret;
2174 }
2175 
2176 static void votequorum_set_icmap_ro_keys(void)
2177 {
2178  icmap_set_ro_access("quorum.allow_downscale", CS_FALSE, CS_TRUE);
2179  icmap_set_ro_access("quorum.wait_for_all", CS_FALSE, CS_TRUE);
2180  icmap_set_ro_access("quorum.last_man_standing", CS_FALSE, CS_TRUE);
2181  icmap_set_ro_access("quorum.last_man_standing_window", CS_FALSE, CS_TRUE);
2182  icmap_set_ro_access("quorum.expected_votes_tracking", CS_FALSE, CS_TRUE);
2183  icmap_set_ro_access("quorum.auto_tie_breaker", CS_FALSE, CS_TRUE);
2184  icmap_set_ro_access("quorum.auto_tie_breaker_node", CS_FALSE, CS_TRUE);
2185 }
2186 
2187 static char *votequorum_exec_init_fn (struct corosync_api_v1 *api)
2188 {
2189  char *error = NULL;
2190 
2191  ENTER();
2192 
2193  /*
2194  * make sure we start clean
2195  */
2196  list_init(&cluster_members_list);
2197  list_init(&trackers_list);
2198  qdevice = NULL;
2199  us = NULL;
2200  memset(cluster_nodes, 0, sizeof(cluster_nodes));
2201 
2202  /*
2203  * Allocate a cluster_node for qdevice
2204  */
2205  qdevice = allocate_node(VOTEQUORUM_QDEVICE_NODEID);
2206  if (!qdevice) {
2207  LEAVE();
2208  return ((char *)"Could not allocate node.");
2209  }
2210  qdevice->votes = 0;
2211  memset(qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2212 
2213  /*
2214  * Allocate a cluster_node for us
2215  */
2216  us = allocate_node(corosync_api->totem_nodeid_get());
2217  if (!us) {
2218  LEAVE();
2219  return ((char *)"Could not allocate node.");
2220  }
2221 
2222  icmap_set_uint32("runtime.votequorum.this_node_id", us->node_id);
2223 
2224  us->state = NODESTATE_MEMBER;
2225  us->votes = 1;
2226  us->flags |= NODE_FLAGS_FIRST;
2227 
2228  error = votequorum_readconfig(VOTEQUORUM_READCONFIG_STARTUP);
2229  if (error) {
2230  return error;
2231  }
2232  recalculate_quorum(0, 0);
2233 
2234  /*
2235  * Set RO keys in icmap
2236  */
2237  votequorum_set_icmap_ro_keys();
2238 
2239  /*
2240  * Listen for changes
2241  */
2242  votequorum_exec_add_config_notification();
2243 
2244  /*
2245  * Start us off with one node
2246  */
2247  votequorum_exec_send_nodeinfo(us->node_id);
2248 
2249  LEAVE();
2250 
2251  return (NULL);
2252 }
2253 
2254 /*
2255  * votequorum service core
2256  */
2257 
2258 static void votequorum_last_man_standing_timer_fn(void *arg)
2259 {
2260  ENTER();
2261 
2262  last_man_standing_timer_set = 0;
2263  if (cluster_is_quorate) {
2264  recalculate_quorum(1,1);
2265  }
2266 
2267  LEAVE();
2268 }
2269 
2270 static void votequorum_sync_init (
2271  const unsigned int *trans_list, size_t trans_list_entries,
2272  const unsigned int *member_list, size_t member_list_entries,
2273  const struct memb_ring_id *ring_id)
2274 {
2275  int i, j;
2276  int found;
2277  int left_nodes;
2278  struct cluster_node *node;
2279 
2280  ENTER();
2281 
2282  sync_in_progress = 1;
2283  sync_nodeinfo_sent = 0;
2284  sync_wait_for_poll_or_timeout = 0;
2285 
2286  if (member_list_entries > 1) {
2287  us->flags &= ~NODE_FLAGS_FIRST;
2288  }
2289 
2290  /*
2291  * we don't need to track which nodes have left directly,
2292  * since that info is in the node db, but we need to know
2293  * if somebody has left for last_man_standing
2294  */
2295  left_nodes = 0;
2296  for (i = 0; i < quorum_members_entries; i++) {
2297  found = 0;
2298  for (j = 0; j < member_list_entries; j++) {
2299  if (quorum_members[i] == member_list[j]) {
2300  found = 1;
2301  break;
2302  }
2303  }
2304  if (found == 0) {
2305  left_nodes = 1;
2306  node = find_node_by_nodeid(quorum_members[i]);
2307  if (node) {
2308  node->state = NODESTATE_DEAD;
2309  }
2310  }
2311  }
2312 
2313  if (last_man_standing) {
2314  if (((member_list_entries >= quorum) && (left_nodes)) ||
2315  ((member_list_entries <= quorum) && (auto_tie_breaker != ATB_NONE) && (check_low_node_id_partition() == 1))) {
2316  if (last_man_standing_timer_set) {
2317  corosync_api->timer_delete(last_man_standing_timer);
2318  last_man_standing_timer_set = 0;
2319  }
2320  corosync_api->timer_add_duration((unsigned long long)last_man_standing_window*1000000,
2321  NULL, votequorum_last_man_standing_timer_fn,
2322  &last_man_standing_timer);
2323  last_man_standing_timer_set = 1;
2324  }
2325  }
2326 
2327  memcpy(previous_quorum_members, quorum_members, sizeof(unsigned int) * quorum_members_entries);
2328  previous_quorum_members_entries = quorum_members_entries;
2329 
2330  memcpy(quorum_members, member_list, sizeof(unsigned int) * member_list_entries);
2331  quorum_members_entries = member_list_entries;
2332  memcpy(&quorum_ringid, ring_id, sizeof(*ring_id));
2333 
2335  /*
2336  * Reset poll timer. Sync waiting is interrupted on valid qdevice poll or after timeout
2337  */
2338  if (qdevice_timer_set) {
2339  corosync_api->timer_delete(qdevice_timer);
2340  }
2341  corosync_api->timer_add_duration((unsigned long long)qdevice_sync_timeout*1000000, qdevice,
2342  qdevice_timer_fn, &qdevice_timer);
2343  qdevice_timer_set = 1;
2344  sync_wait_for_poll_or_timeout = 1;
2345 
2346  log_printf(LOGSYS_LEVEL_INFO, "waiting for quorum device %s poll (but maximum for %u ms)",
2347  qdevice_name, qdevice_sync_timeout);
2348  }
2349 
2350  LEAVE();
2351 }
2352 
2353 static int votequorum_sync_process (void)
2354 {
2355 
2356  if (!sync_nodeinfo_sent) {
2357  votequorum_exec_send_nodeinfo(us->node_id);
2358  votequorum_exec_send_nodeinfo(VOTEQUORUM_QDEVICE_NODEID);
2359  if (strlen(qdevice_name)) {
2360  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2361  qdevice_name);
2362  }
2363  sync_nodeinfo_sent = 1;
2364  }
2365 
2366  if (us->flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) {
2367  /*
2368  * Waiting for qdevice to poll with new ringid or timeout
2369  */
2370 
2371  return (-1);
2372  }
2373 
2374  return 0;
2375 }
2376 
2377 static void votequorum_sync_activate (void)
2378 {
2379  recalculate_quorum(0, 0);
2380  quorum_callback(quorum_members, quorum_members_entries,
2381  cluster_is_quorate, &quorum_ringid);
2382  votequorum_exec_send_quorum_notification(NULL, 0L);
2383 
2384  sync_in_progress = 0;
2385 }
2386 
2387 static void votequorum_sync_abort (void)
2388 {
2389 
2390 }
2391 
2393  quorum_set_quorate_fn_t q_set_quorate_fn)
2394 {
2395  char *error;
2396 
2397  ENTER();
2398 
2399  if (q_set_quorate_fn == NULL) {
2400  return ((char *)"Quorate function not set");
2401  }
2402 
2403  corosync_api = api;
2404  quorum_callback = q_set_quorate_fn;
2405 
2406  error = corosync_service_link_and_init(corosync_api,
2407  &votequorum_service[0]);
2408  if (error) {
2409  return (error);
2410  }
2411 
2412  LEAVE();
2413 
2414  return (NULL);
2415 }
2416 
2417 /*
2418  * Library Handler init/fini
2419  */
2420 
2421 static int quorum_lib_init_fn (void *conn)
2422 {
2423  struct quorum_pd *pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2424 
2425  ENTER();
2426 
2427  list_init (&pd->list);
2428  pd->conn = conn;
2429 
2430  LEAVE();
2431  return (0);
2432 }
2433 
2434 static int quorum_lib_exit_fn (void *conn)
2435 {
2436  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2437 
2438  ENTER();
2439 
2440  if (quorum_pd->tracking_enabled) {
2441  list_del (&quorum_pd->list);
2442  list_init (&quorum_pd->list);
2443  }
2444 
2445  LEAVE();
2446 
2447  return (0);
2448 }
2449 
2450 /*
2451  * library internal functions
2452  */
2453 
2454 static void qdevice_timer_fn(void *arg)
2455 {
2456  ENTER();
2457 
2458  if ((!(us->flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
2459  (!qdevice_timer_set)) {
2460  LEAVE();
2461  return;
2462  }
2463 
2466  log_printf(LOGSYS_LEVEL_INFO, "lost contact with quorum device %s", qdevice_name);
2467  votequorum_exec_send_nodeinfo(us->node_id);
2468 
2469  qdevice_timer_set = 0;
2470  sync_wait_for_poll_or_timeout = 0;
2471 
2472  LEAVE();
2473 }
2474 
2475 /*
2476  * Library Handler Functions
2477  */
2478 
2479 static void message_handler_req_lib_votequorum_getinfo (void *conn, const void *message)
2480 {
2482  struct res_lib_votequorum_getinfo res_lib_votequorum_getinfo;
2483  struct cluster_node *node;
2484  unsigned int highest_expected = 0;
2485  unsigned int total_votes = 0;
2486  cs_error_t error = CS_OK;
2487  uint32_t nodeid = req_lib_votequorum_getinfo->nodeid;
2488 
2489  ENTER();
2490 
2491  log_printf(LOGSYS_LEVEL_DEBUG, "got getinfo request on %p for node %u", conn, req_lib_votequorum_getinfo->nodeid);
2492 
2493  if (nodeid == VOTEQUORUM_QDEVICE_NODEID) {
2494  nodeid = us->node_id;
2495  }
2496 
2497  node = find_node_by_nodeid(nodeid);
2498  if (node) {
2499  struct cluster_node *iternode;
2500  struct list_head *nodelist;
2501 
2502  list_iterate(nodelist, &cluster_members_list) {
2503  iternode = list_entry(nodelist, struct cluster_node, list);
2504 
2505  if (iternode->state == NODESTATE_MEMBER) {
2506  highest_expected =
2507  max(highest_expected, iternode->expected_votes);
2508  total_votes += iternode->votes;
2509  }
2510  }
2511 
2512  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2513  total_votes += qdevice->votes;
2514  }
2515 
2516  switch(node->state) {
2517  case NODESTATE_MEMBER:
2518  res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_MEMBER;
2519  break;
2520  case NODESTATE_DEAD:
2521  res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_DEAD;
2522  break;
2523  case NODESTATE_LEAVING:
2524  res_lib_votequorum_getinfo.state = VOTEQUORUM_NODESTATE_LEAVING;
2525  break;
2526  default:
2527  res_lib_votequorum_getinfo.state = node->state;
2528  break;
2529  }
2530  res_lib_votequorum_getinfo.state = node->state;
2531  res_lib_votequorum_getinfo.votes = node->votes;
2532  res_lib_votequorum_getinfo.expected_votes = node->expected_votes;
2533  res_lib_votequorum_getinfo.highest_expected = highest_expected;
2534 
2535  res_lib_votequorum_getinfo.quorum = quorum;
2536  res_lib_votequorum_getinfo.total_votes = total_votes;
2537  res_lib_votequorum_getinfo.flags = 0;
2538  res_lib_votequorum_getinfo.nodeid = node->node_id;
2539 
2540  if (two_node) {
2541  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_TWONODE;
2542  }
2543  if (cluster_is_quorate) {
2544  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QUORATE;
2545  }
2546  if (wait_for_all) {
2547  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_WAIT_FOR_ALL;
2548  }
2549  if (last_man_standing) {
2550  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_LAST_MAN_STANDING;
2551  }
2552  if (auto_tie_breaker != ATB_NONE) {
2553  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_AUTO_TIE_BREAKER;
2554  }
2555  if (allow_downscale) {
2556  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_ALLOW_DOWNSCALE;
2557  }
2558 
2559  memset(res_lib_votequorum_getinfo.qdevice_name, 0, VOTEQUORUM_QDEVICE_MAX_NAME_LEN);
2560  strcpy(res_lib_votequorum_getinfo.qdevice_name, qdevice_name);
2561  res_lib_votequorum_getinfo.qdevice_votes = qdevice->votes;
2562 
2563  if (node->flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2564  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_REGISTERED;
2565  }
2566  if (node->flags & NODE_FLAGS_QDEVICE_ALIVE) {
2567  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_ALIVE;
2568  }
2569  if (node->flags & NODE_FLAGS_QDEVICE_CAST_VOTE) {
2570  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_CAST_VOTE;
2571  }
2572  if (node->flags & NODE_FLAGS_QDEVICE_MASTER_WINS) {
2573  res_lib_votequorum_getinfo.flags |= VOTEQUORUM_INFO_QDEVICE_MASTER_WINS;
2574  }
2575  } else {
2576  error = CS_ERR_NOT_EXIST;
2577  }
2578 
2579  res_lib_votequorum_getinfo.header.size = sizeof(res_lib_votequorum_getinfo);
2580  res_lib_votequorum_getinfo.header.id = MESSAGE_RES_VOTEQUORUM_GETINFO;
2581  res_lib_votequorum_getinfo.header.error = error;
2582  corosync_api->ipc_response_send(conn, &res_lib_votequorum_getinfo, sizeof(res_lib_votequorum_getinfo));
2583  log_printf(LOGSYS_LEVEL_DEBUG, "getinfo response error: %d", error);
2584 
2585  LEAVE();
2586 }
2587 
2588 static void message_handler_req_lib_votequorum_setexpected (void *conn, const void *message)
2589 {
2591  struct res_lib_votequorum_status res_lib_votequorum_status;
2592  cs_error_t error = CS_OK;
2593  unsigned int newquorum;
2594  unsigned int total_votes;
2595  uint8_t allow_downscale_status = 0;
2596 
2597  ENTER();
2598 
2599  allow_downscale_status = allow_downscale;
2600  allow_downscale = 0;
2601 
2602  /*
2603  * Validate new expected votes
2604  */
2605  newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
2606  allow_downscale = allow_downscale_status;
2607  if (newquorum < total_votes / 2 ||
2608  newquorum > total_votes) {
2609  error = CS_ERR_INVALID_PARAM;
2610  goto error_exit;
2611  }
2612 
2613  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES, us->node_id,
2614  req_lib_votequorum_setexpected->expected_votes);
2615 
2616 error_exit:
2617  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2618  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2619  res_lib_votequorum_status.header.error = error;
2620  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2621 
2622  LEAVE();
2623 }
2624 
2625 static void message_handler_req_lib_votequorum_setvotes (void *conn, const void *message)
2626 {
2628  struct res_lib_votequorum_status res_lib_votequorum_status;
2629  struct cluster_node *node;
2630  unsigned int newquorum;
2631  unsigned int total_votes;
2632  unsigned int saved_votes;
2633  cs_error_t error = CS_OK;
2634  unsigned int nodeid;
2635 
2636  ENTER();
2637 
2638  nodeid = req_lib_votequorum_setvotes->nodeid;
2639  node = find_node_by_nodeid(nodeid);
2640  if (!node) {
2641  error = CS_ERR_NAME_NOT_FOUND;
2642  goto error_exit;
2643  }
2644 
2645  /*
2646  * Check votes is valid
2647  */
2648  saved_votes = node->votes;
2649  node->votes = req_lib_votequorum_setvotes->votes;
2650 
2651  newquorum = calculate_quorum(1, 0, &total_votes);
2652 
2653  if (newquorum < total_votes / 2 ||
2654  newquorum > total_votes) {
2655  node->votes = saved_votes;
2656  error = CS_ERR_INVALID_PARAM;
2657  goto error_exit;
2658  }
2659 
2660  votequorum_exec_send_reconfigure(VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES, nodeid,
2661  req_lib_votequorum_setvotes->votes);
2662 
2663 error_exit:
2664  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2665  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2666  res_lib_votequorum_status.header.error = error;
2667  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2668 
2669  LEAVE();
2670 }
2671 
2672 static void message_handler_req_lib_votequorum_trackstart (void *conn,
2673  const void *message)
2674 {
2676  struct res_lib_votequorum_status res_lib_votequorum_status;
2677  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2678  cs_error_t error = CS_OK;
2679 
2680  ENTER();
2681 
2682  /*
2683  * If an immediate listing of the current cluster membership
2684  * is requested, generate membership list
2685  */
2686  if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CURRENT ||
2687  req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES) {
2688  log_printf(LOGSYS_LEVEL_DEBUG, "sending initial status to %p", conn);
2689  votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->context);
2690  }
2691 
2692  if (quorum_pd->tracking_enabled) {
2693  error = CS_ERR_EXIST;
2694  goto response_send;
2695  }
2696 
2697  /*
2698  * Record requests for tracking
2699  */
2700  if (req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES ||
2701  req_lib_votequorum_trackstart->track_flags & CS_TRACK_CHANGES_ONLY) {
2702 
2703  quorum_pd->track_flags = req_lib_votequorum_trackstart->track_flags;
2704  quorum_pd->tracking_enabled = 1;
2705  quorum_pd->tracking_context = req_lib_votequorum_trackstart->context;
2706 
2707  list_add (&quorum_pd->list, &trackers_list);
2708  }
2709 
2710 response_send:
2711  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2712  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2713  res_lib_votequorum_status.header.error = error;
2714  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2715 
2716  LEAVE();
2717 }
2718 
2719 static void message_handler_req_lib_votequorum_trackstop (void *conn,
2720  const void *message)
2721 {
2722  struct res_lib_votequorum_status res_lib_votequorum_status;
2723  struct quorum_pd *quorum_pd = (struct quorum_pd *)corosync_api->ipc_private_data_get (conn);
2724  int error = CS_OK;
2725 
2726  ENTER();
2727 
2728  if (quorum_pd->tracking_enabled) {
2729  error = CS_OK;
2730  quorum_pd->tracking_enabled = 0;
2731  list_del (&quorum_pd->list);
2732  list_init (&quorum_pd->list);
2733  } else {
2734  error = CS_ERR_NOT_EXIST;
2735  }
2736 
2737  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2738  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2739  res_lib_votequorum_status.header.error = error;
2740  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2741 
2742  LEAVE();
2743 }
2744 
2745 static void message_handler_req_lib_votequorum_qdevice_register (void *conn,
2746  const void *message)
2747 {
2749  struct res_lib_votequorum_status res_lib_votequorum_status;
2750  cs_error_t error = CS_OK;
2751 
2752  ENTER();
2753 
2754  if (!qdevice_can_operate) {
2755  log_printf(LOGSYS_LEVEL_INFO, "Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
2756  error = CS_ERR_ACCESS;
2757  goto out;
2758  }
2759 
2761  if ((!strncmp(req_lib_votequorum_qdevice_register->name,
2762  qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN))) {
2763  goto out;
2764  } else {
2766  "A new qdevice with different name (new: %s old: %s) is trying to re-register!",
2767  req_lib_votequorum_qdevice_register->name, qdevice_name);
2768  error = CS_ERR_EXIST;
2769  goto out;
2770  }
2771  } else {
2772  if (qdevice_reg_conn != NULL) {
2774  "Registration request already in progress");
2775  error = CS_ERR_TRY_AGAIN;
2776  goto out;
2777  }
2778  qdevice_reg_conn = conn;
2779  if (votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_REGISTER,
2780  req_lib_votequorum_qdevice_register->name) != 0) {
2782  "Unable to send qdevice registration request to cluster");
2783  error = CS_ERR_TRY_AGAIN;
2784  qdevice_reg_conn = NULL;
2785  } else {
2786  LEAVE();
2787  return;
2788  }
2789  }
2790 
2791 out:
2792 
2793  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2794  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2795  res_lib_votequorum_status.header.error = error;
2796  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2797 
2798  LEAVE();
2799 }
2800 
2801 static void message_handler_req_lib_votequorum_qdevice_unregister (void *conn,
2802  const void *message)
2803 {
2805  struct res_lib_votequorum_status res_lib_votequorum_status;
2806  cs_error_t error = CS_OK;
2807 
2808  ENTER();
2809 
2811  if (strncmp(req_lib_votequorum_qdevice_unregister->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2812  error = CS_ERR_INVALID_PARAM;
2813  goto out;
2814  }
2815  if (qdevice_timer_set) {
2816  corosync_api->timer_delete(qdevice_timer);
2817  qdevice_timer_set = 0;
2818  sync_wait_for_poll_or_timeout = 0;
2819  }
2824  votequorum_exec_send_nodeinfo(us->node_id);
2825  votequorum_exec_send_qdevice_reg(VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER,
2826  req_lib_votequorum_qdevice_unregister->name);
2827  } else {
2828  error = CS_ERR_NOT_EXIST;
2829  }
2830 
2831 out:
2832  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2833  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2834  res_lib_votequorum_status.header.error = error;
2835  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2836 
2837  LEAVE();
2838 }
2839 
2840 static void message_handler_req_lib_votequorum_qdevice_update (void *conn,
2841  const void *message)
2842 {
2844  struct res_lib_votequorum_status res_lib_votequorum_status;
2845  cs_error_t error = CS_OK;
2846 
2847  ENTER();
2848 
2850  if (strncmp(req_lib_votequorum_qdevice_update->oldname, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2851  error = CS_ERR_INVALID_PARAM;
2852  goto out;
2853  }
2854  votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->oldname,
2855  req_lib_votequorum_qdevice_update->newname);
2856  } else {
2857  error = CS_ERR_NOT_EXIST;
2858  }
2859 
2860 out:
2861  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2862  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2863  res_lib_votequorum_status.header.error = error;
2864  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2865 
2866  LEAVE();
2867 }
2868 
2869 static void message_handler_req_lib_votequorum_qdevice_poll (void *conn,
2870  const void *message)
2871 {
2873  struct res_lib_votequorum_status res_lib_votequorum_status;
2874  cs_error_t error = CS_OK;
2875  uint32_t oldflags;
2876 
2877  ENTER();
2878 
2879  if (!qdevice_can_operate) {
2880  error = CS_ERR_ACCESS;
2881  goto out;
2882  }
2883 
2885  if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.rep.nodeid &&
2886  req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.seq)) {
2887  log_printf(LOGSYS_LEVEL_DEBUG, "Received poll ring id (%u.%"PRIu64") != last sync "
2888  "ring id (%u.%"PRIu64"). Ignoring poll call.",
2889  req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq,
2890  quorum_ringid.rep.nodeid, quorum_ringid.seq);
2891  error = CS_ERR_MESSAGE_ERROR;
2892  goto out;
2893  }
2894  if (strncmp(req_lib_votequorum_qdevice_poll->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2895  error = CS_ERR_INVALID_PARAM;
2896  goto out;
2897  }
2898 
2899  if (qdevice_timer_set) {
2900  corosync_api->timer_delete(qdevice_timer);
2901  qdevice_timer_set = 0;
2902  }
2903 
2904  oldflags = us->flags;
2905 
2907 
2908  if (req_lib_votequorum_qdevice_poll->cast_vote) {
2910  } else {
2912  }
2913 
2914  if (us->flags != oldflags) {
2915  votequorum_exec_send_nodeinfo(us->node_id);
2916  }
2917 
2918  corosync_api->timer_add_duration((unsigned long long)qdevice_timeout*1000000, qdevice,
2919  qdevice_timer_fn, &qdevice_timer);
2920  qdevice_timer_set = 1;
2921  sync_wait_for_poll_or_timeout = 0;
2922  } else {
2923  error = CS_ERR_NOT_EXIST;
2924  }
2925 
2926 out:
2927  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2928  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2929  res_lib_votequorum_status.header.error = error;
2930  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2931 
2932  LEAVE();
2933 }
2934 
2935 static void message_handler_req_lib_votequorum_qdevice_master_wins (void *conn,
2936  const void *message)
2937 {
2939  struct res_lib_votequorum_status res_lib_votequorum_status;
2940  cs_error_t error = CS_OK;
2941  uint32_t oldflags = us->flags;
2942 
2943  ENTER();
2944 
2945  if (!qdevice_can_operate) {
2946  error = CS_ERR_ACCESS;
2947  goto out;
2948  }
2949 
2951  if (strncmp(req_lib_votequorum_qdevice_master_wins->name, qdevice_name, VOTEQUORUM_QDEVICE_MAX_NAME_LEN)) {
2952  error = CS_ERR_INVALID_PARAM;
2953  goto out;
2954  }
2955 
2956  if (req_lib_votequorum_qdevice_master_wins->allow) {
2958  } else {
2960  }
2961 
2962  if (us->flags != oldflags) {
2963  votequorum_exec_send_nodeinfo(us->node_id);
2964  }
2965 
2966  update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->allow);
2967  } else {
2968  error = CS_ERR_NOT_EXIST;
2969  }
2970 
2971 out:
2972  res_lib_votequorum_status.header.size = sizeof(res_lib_votequorum_status);
2973  res_lib_votequorum_status.header.id = MESSAGE_RES_VOTEQUORUM_STATUS;
2974  res_lib_votequorum_status.header.error = error;
2975  corosync_api->ipc_response_send(conn, &res_lib_votequorum_status, sizeof(res_lib_votequorum_status));
2976 
2977  LEAVE();
2978 }
uint32_t expected_votes
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
void *(* ipc_private_data_get)(void *conn)
Definition: coroapi.h:256
#define VOTEQUORUM_INFO_QUORATE
#define TOTEM_AGREED
Definition: coroapi.h:102
#define CS_TRUE
Definition: corotypes.h:54
const char * name
Definition: coroapi.h:492
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_READCONFIG_STARTUP
const char * get_run_dir(void)
Definition: util.c:174
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
Definition: icmap.c:1103
#define NODE_FLAGS_WFASTATUS
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:73
uint32_t value
#define CS_FALSE
Definition: corotypes.h:53
struct list_head * next
Definition: list.h:47
#define NODE_FLAGS_QUORATE
#define VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT
The corosync_service_engine struct.
Definition: coroapi.h:491
struct list_head list
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
Definition: icmap.c:1124
The req_lib_votequorum_qdevice_master_wins struct.
#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER
#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE
#define max(a, b)
int(* ipc_response_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:258
#define list_iterate(v, head)
char * votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn)
nodestate_t
#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA
int tracking_enabled
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define CS_TRACK_CURRENT
Definition: corotypes.h:87
The req_lib_votequorum_qdevice_unregister struct.
#define NODE_FLAGS_QDEVICE_MASTER_WINS
nodestate_t state
The corosync_lib_handler struct.
Definition: coroapi.h:468
#define VOTEQUORUM_INFO_LAST_MAN_STANDING
struct message_header header
Definition: totemsrp.c:60
#define VOTEQUORUM_INFO_WAIT_FOR_ALL
#define NODE_FLAGS_QDEVICE_CAST_VOTE
uint32_t operation
The res_lib_votequorum_status struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE
The corosync_exec_handler struct.
Definition: coroapi.h:476
#define VOTEQUORUM_INFO_TWONODE
int(* totem_mcast)(const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee)
Definition: coroapi.h:281
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
Definition: list.h:46
#define VOTEQUORUM_INFO_QDEVICE_REGISTERED
#define log_printf(level, format, args...)
Definition: logsys.h:319
void(* exec_handler_fn)(const void *msg, unsigned int nodeid)
Definition: coroapi.h:477
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The res_lib_votequorum_notification struct.
#define VOTEQUORUM_QDEVICE_NODEID
#define VOTEQUORUM_INFO_QDEVICE_MASTER_WINS
#define VOTEQUORUM_NODESTATE_MEMBER
#define CS_TRACK_CHANGES
Definition: corotypes.h:88
#define SERVICE_ID_MAKE(a, b)
Definition: coroapi.h:459
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
void(* quorum_set_quorate_fn_t)(const unsigned int *view_list, size_t view_list_entries, int quorate, struct memb_ring_id *)
Definition: exec/quorum.h:42
#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER
cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8)
Definition: icmap.c:842
void(* error_memory_failure)(void) __attribute__((noreturn))
Definition: coroapi.h:423
#define VOTEQUORUM_INFO_ALLOW_DOWNSCALE
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:71
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
#define VOTEQUORUM_INFO_QDEVICE_ALIVE
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:611
void * user_data
Definition: sam.c:126
struct list_head list
unsigned int(* totem_nodeid_get)(void)
Definition: coroapi.h:275
unsigned int nodeid
Definition: coroapi.h:112
#define CS_TRACK_CHANGES_ONLY
Definition: corotypes.h:89
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
The req_lib_votequorum_getinfo struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
Linked list API.
struct totem_ip_address rep
Definition: coroapi.h:123
#define COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
Definition: coroapi.h:157
The req_lib_votequorum_qdevice_update struct.
cs_error_t
The cs_error_t enum.
Definition: corotypes.h:94
unsigned char track_flags
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:74
LOGSYS_DECLARE_SUBSYS("VOTEQ")
The req_lib_votequorum_setvotes struct.
The corosync_api_v1 struct.
Definition: coroapi.h:225
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:866
uint8_t param
The req_lib_votequorum_setexpected struct.
uint32_t quorate
Definition: sam.c:133
#define swab32(x)
The swab32 macro.
Definition: swab.h:51
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER
struct corosync_service_engine * votequorum_get_service_engine_ver0(void)
The res_lib_votequorum_expectedvotes_notification struct.
#define ENTER
Definition: logsys.h:320
The req_lib_votequorum_qdevice_register struct.
char * corosync_service_link_and_init(struct corosync_api_v1 *corosync_api, struct default_service *service)
Link and initialize a service.
Definition: service.c:117
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_NODESTATE_LEAVING
#define PROCESSOR_COUNT_MAX
Definition: coroapi.h:96
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG
The memb_ring_id struct.
Definition: coroapi.h:122
#define VOTEQUORUM_READCONFIG_RUNTIME
struct list_head * prev
Definition: list.h:48
#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO
The req_lib_votequorum_trackstart struct.
#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES
#define VOTEQUORUM_QDEVICE_MAX_NAME_LEN
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
The req_lib_votequorum_qdevice_poll struct.
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:896
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:69
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define NODE_FLAGS_LEAVING
#define list_entry(ptr, type, member)
Definition: list.h:84
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define COROSYNC_LIB_FLOW_CONTROL_REQUIRED
Definition: coroapi.h:156
#define LOGSYS_LEVEL_NOTICE
Definition: logsys.h:72
unsigned long long seq
Definition: coroapi.h:124
cs_error_t icmap_set_uint8(const char *key_name, uint8_t value)
Definition: icmap.c:587
void(* lib_handler_fn)(void *conn, const void *msg)
Definition: coroapi.h:469
The res_lib_votequorum_getinfo struct.
#define VOTEQUORUM_NODESTATE_DEAD
cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access)
Set read-only access for given key (key_name) or prefix, If prefix is set.
Definition: icmap.c:1233
#define VOTEQUORUM_INFO_QDEVICE_CAST_VOTE
int(* ipc_dispatch_send)(void *conn, const void *msg, size_t mlen)
Definition: coroapi.h:263
#define VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT
const char * name
Definition: service.h:43
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
Definition: icmap.c:1097
struct memb_ring_id ring_id
Definition: totemsrp.c:64
uint64_t tracking_context
#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES
#define DEFAULT_LMS_WIN
#define LEAVE
Definition: logsys.h:321
#define NODE_FLAGS_QDEVICE_ALIVE
qb_map_iter_t * icmap_iter_t
Itterator type.
Definition: icmap.h:123
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
#define NODE_FLAGS_QDEVICE_REGISTERED
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1167
#define NODE_FLAGS_FIRST
struct qb_ipc_request_header header __attribute__((aligned(8)))
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem.nodeid", "totem.version", ...
Definition: icmap.h:85