5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
17 DBAGENT_LOG=/var/log/dbwatchdog.log
20 DSSCLI=/usr/local/bin/dsscli
21 BECOMEMASTERATTR=become-master
24 LOCKCLI=/usr/local/bin/lockcli
28 LOCKUUID_FILE=/var/run/.$DOMAIN.lock.uuid
33 function get_db_nodes()
35 IFS=',' read -a dbnodes <<< $1
36 dbnodes_count=${#dbnodes[@]}
47 logger $priority "${FUNCNAME[2]} ${message}"
48 echo "$(date) ($priority) ${FUNCNAME[2]} ${message}" >> $DBAGENT_LOG
66 result=$(eval "$*" 2>&1)
68 if [ $ret -ne 0 ]; then
69 log_error "Failed with error $result"
71 log_info "Command succeeded: $result"
77 function is_db_instance_running()
79 output=$(/usr/bin/mysql -h $node -e "select 1" 2>&1)
81 log_info "DB instance in $node is up"
85 echo $output | grep "Access denied"
87 log_info "DB instance in $node is up"
94 function is_single_node()
96 log_info "Checking if we are running in single-node environment"
97 if [ $dbnodes_count -gt 1 ]; then
106 log_info "Acquiring lock"
108 output=$($LOCKCLI lock --id $LOCKNAME --timeout $LOCKTIMEOUT)
109 if [ $? -eq 0 ]; then
110 LOCKUUID=$(echo $output | grep "uuid=" | /bin/awk -F= '{print $2}')
113 log_info "Cannot acquire lock, waiting..."
120 log_info "Releasing lock"
121 uuid=$(cat $LOCKUUID_FILE)
122 run_cmd "$LOCKCLI unlock --id $LOCKNAME --uuid $uuid"
126 function set_becoming_master()
128 log_info "Setting becoming master"
129 run_cmd "$DSSCLI set --domain $DOMAIN --name $BECOMEMASTERATTR --value $OWNNODE"
133 if [ $ret -eq 0 ]; then
135 log_info "Waiting for become master to be set"
136 is_becoming_master_set
137 if [ $? -eq 1 ]; then
147 function is_becoming_master_set()
149 log_info "Checking if becoming master is set"
150 value=$(run_cmd "$DSSCLI get --domain $DOMAIN --name $BECOMEMASTERATTR")
151 if [ $? -ne 0 ]; then
154 if [ "z$value" != "znone" ]; then
160 function get_becoming_master_node()
162 log_info "Getting the node trying to become master"
163 value=$(run_cmd "$DSSCLI get --domain $DOMAIN --name $BECOMEMASTERATTR")
165 if [ $ret -ne 0 ]; then
172 function unset_becoming_master()
174 log_info "Unsetting becoming master"
175 run_cmd "$DSSCLI set --domain $DOMAIN --name $BECOMEMASTERATTR --value none"
179 function set_wsrep_new_cluster()
181 log_info "Setting new cluster and safe to bootstrap"
182 run_cmd "sed -i 's/^safe_to_bootstrap: 0/safe_to_bootstrap: 1/g' /var/lib/mysql/grastate.dat"
183 run_cmd "systemctl set-environment _WSREP_NEW_CLUSTER='--wsrep-new-cluster'"
186 function unset_wsrep_new_cluster()
188 log_info "Clearing new cluster flag and safe to bootstrap"
189 run_cmd "sed -i 's/^safe_to_bootstrap: 1/safe_to_bootstrap: 0/g' /var/lib/mysql/grastate.dat"
190 run_cmd "systemctl set-environment _WSREP_NEW_CLUSTER=''"
194 function set_running()
196 log_info "Setting running flag to true"
197 run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.running --value true"
200 function unset_running()
202 log_info "Setting running flag to false"
203 run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.running --value false"
207 function write_state()
209 uuid=$(grep uuid /var/lib/mysql/grastate.dat | awk '{print $2}')
210 seqno=$(grep seqno /var/lib/mysql/grastate.dat | awk '{print $2}')
211 run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.uuid --value $uuid"
212 run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.seqno --value $seqno"
216 function get_node_uuid()
219 log_info "Getting uuid of node $node"
220 uuid=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.uuid")
222 if [ $ret -ne 0 ]; then
229 function get_node_seqno()
232 log_info "Getting seqno of node $node"
233 seqno=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.seqno")
235 if [ $ret -ne 0 ]; then
242 function do_others_have_good_seqno()
245 log_info "Checking if any node have a valid seqno"
246 for no in $($DSSCLI get-domain --domain $DOMAIN | grep seqno | awk '{print $3}'); do
247 if [ $no -gt 0 ]; then
248 log_info "Some node have a valid seqno"
252 log_info "No node with valid seqno found"
256 function get_node_running()
259 log_info "Getting if $node is running"
260 running=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.running")
261 if [ $? -ne 0 ]; then
262 log_info "command failed with error $running"
265 log_info "Total running $running"
266 if [ "z$running" == "ztrue" ]; then
273 function is_any_db_instance_running()
275 log_info "Getting nodes in which the db is running"
277 for node in "${dbnodes[@]}"; do
278 if [ "x$node" == "x$OWNNODE" ]; then
282 is_db_instance_running $node
283 if [ $? -eq 1 ]; then
284 log_info "DB instance in $node is up"
291 function is_cluster_running()
293 log_info "Checking if an existing galera cluster is running"
295 #check if any instance of the db is up and running
296 is_any_db_instance_running
297 if [ $? -eq 1 ]; then
304 function wait_cluster_running()
306 log_info "Waiting for cluster to become running"
311 if [ $cluster_running -eq 1 ]; then
312 log_info "cluster is running"
322 log_info "start_pre called"
323 #check for single node case
326 if [ $single_node -eq 1 ]; then
327 echo "Doing nothing as we are running in a single-node environment"
334 if [ $cluster_running -eq 1 ]; then
335 log_info "starting normally as a galera cluster is already running"
339 #check if we have good seqno, if not then we need to wait for the active
340 #as we cannot become master
341 log_info "checking own sequence number"
342 seqno=$(get_node_seqno $OWNNODE)
343 if [ $seqno -le 0 ]; then
344 #check the seqno of others
345 do_others_have_good_seqno
346 if [ $? -eq 1 ]; then
347 log_info "bad seqno $seqno we need to wait for cluster to become running"
355 if [ $seqno -le 0 ]; then
356 log_info "no one seems to have a good seqno"
358 log_info "no running galera cluster found and we have good seqno"
361 log_info "check if someone is trying to become master"
362 is_becoming_master_set
364 if [ $becoming_master -eq 1 ]; then
365 log_info "someone is trying to become master, backing off"
372 log_info "no one is trying to become master, let us become master"
374 set_wsrep_new_cluster
378 function start_post()
380 log_info "start_post setting running state to true"
381 #check for single node case
384 if [ $single_node -eq 1 ]; then
385 echo "Doing nothing as we are running in a single-node environment"
390 if [ $qm -eq 1 ]; then
391 become_master_node=$(get_becoming_master_node)
392 if [ "x$become_master_node" == "x$OWNNODE" ]; then
393 unset_becoming_master
398 unset_wsrep_new_cluster
406 log_info "stop_post setting running state to false"
407 #check for single node case
410 if [ $single_node -eq 1 ]; then
411 echo "Doing nothing as we are running in a single-node environment"
416 if [ $qm -eq 1 ]; then
417 become_master_node=$(get_becoming_master_node)
418 if [ "x$become_master_node" == "x$OWNNODE" ]; then
419 unset_becoming_master
423 unset_wsrep_new_cluster
424 if [ $qm -eq 1 ]; then
427 for ((i=0; i<10; i++)); do
428 log_info "Waiting for own state to become not running"
429 get_node_running $OWNNODE
430 if [ $? -eq 0 ]; then
431 log_info "Own state is updated"
443 log_info "waiting until clustercheck is ok"
446 if [ $single_node -eq 1 ]; then
447 log_info "Doing nothing as we are running in a single-node environment"
452 /usr/local/bin/clustercheck
453 if [ $? -eq 0 ]; then
454 log_info "clustercheck is ok"
461 function get_states()
463 log_info "Getting states"
464 run_cmd "$DSSCLI get-domain --domain $DOMAIN"
465 run_cmd "$DSSCLI get-domain --domain _locks"
467 if [ $? -eq 1 ]; then
468 echo "Nodes have quorum"
470 echo "Nodes don't have quorum"
474 function is_in_quorum()
476 log_info "Checking if peer nodes are running"
477 nodes=$($DSSCLI get-domain --domain galera | grep running | awk -F. '{print $1}')
478 if [ $? -ne 0 ]; then
485 for node in "${dbnodes[@]}"; do
487 is_db_instance_running $node
488 if [ $? -eq 1 ]; then
495 log_info "Total $count, up $up, down $down"
497 if [ $count -eq 1 ]; then
501 if [ $up -gt $down ]; then
511 log_info "Checking for hanging mysqld services"
512 mysqlpid=$(/usr/sbin/pidof mysqld)
513 if [ "x$mysqlpid" == "x" ]; then
519 if [ $# -ne 2 ]; then
520 echo "Usage:$0 start-pre|start-post|stop|stop-post|get-states|set-running|kill-old|do-others-have-good-seqno <comma separted list of db node names>"
526 if [ $1 == "start-pre" ]; then
528 elif [ $1 == "start-post" ]; then
530 elif [ $1 == "stop" ]; then
532 elif [ $1 == "stop-post" ]; then
534 elif [ $1 == "get-states" ]; then
536 elif [ $1 == "set-running" ]; then
538 elif [ $1 == "kill-old" ]; then
540 elif [ $1 == "do-others-have-good-seqno" ]; then
541 do_others_have_good_seqno
542 elif [ $1 == "is-any-db-instance-running" ]; then
543 is_any_db_instance_running
545 echo "Result is $result"
547 echo "Invalid option provided"
548 echo "Usage:$0 start-pre|start-post|stop|stop-post|get-states|set-running|kill-old|do-others-have-good-seqno|is-any-db-instance-running"