From 461929b69c0526c3b8c25a11dbbd5d4b301dbac5 Mon Sep 17 00:00:00 2001 From: Baha Mesleh Date: Fri, 22 Mar 2019 11:42:30 +0200 Subject: [PATCH] Initial commit Change-Id: Ib2474e54d28c0d62ed5374e4003af7b79b882ead Signed-off-by: Baha Mesleh --- .gitreview | 5 + LICENSE | 202 ++++++++ .../active-standby-controller.sh | 21 + active-standby-services/active-standby-monitor.sh | 41 ++ active-standby-services/config-manager.service | 1 + active-standby-services/rediscontroller.service | 1 + monitoring.spec | 67 +++ src/become-redis-master.sh | 31 ++ src/become-redis-slave.sh | 24 + src/dbwatchdog-node-shutdown-handler.sh | 29 ++ src/dbwatchdog.sh | 550 +++++++++++++++++++++ src/keepalivedmonitor.py | 58 +++ src/monitoring_can_become_master.sh | 24 + src/monitoring_common.sh | 42 ++ src/monitoring_quorum_down.sh | 24 + src/monitoring_quorum_up.sh | 24 + src/monitoring_state_changed.sh | 27 + src/recover-db-files.sh | 108 ++++ src/rediscontroller.sh | 21 + src/redismonitor.sh | 38 ++ 20 files changed, 1338 insertions(+) create mode 100644 .gitreview create mode 100644 LICENSE create mode 100755 active-standby-services/active-standby-controller.sh create mode 100755 active-standby-services/active-standby-monitor.sh create mode 100644 active-standby-services/config-manager.service create mode 100644 active-standby-services/rediscontroller.service create mode 100644 monitoring.spec create mode 100755 src/become-redis-master.sh create mode 100755 src/become-redis-slave.sh create mode 100755 src/dbwatchdog-node-shutdown-handler.sh create mode 100755 src/dbwatchdog.sh create mode 100755 src/keepalivedmonitor.py create mode 100755 src/monitoring_can_become_master.sh create mode 100755 src/monitoring_common.sh create mode 100755 src/monitoring_quorum_down.sh create mode 100755 src/monitoring_quorum_up.sh create mode 100755 src/monitoring_state_changed.sh create mode 100755 src/recover-db-files.sh create mode 100755 src/rediscontroller.sh create mode 100755 src/redismonitor.sh diff --git a/.gitreview b/.gitreview new file mode 100644 index 0000000..a2e598c --- /dev/null +++ b/.gitreview @@ -0,0 +1,5 @@ +[gerrit] +host=gerrit.akraino.org +port=29418 +project=ta/monitoring +defaultremote=origin diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/active-standby-services/active-standby-controller.sh b/active-standby-services/active-standby-controller.sh new file mode 100755 index 0000000..a5f902f --- /dev/null +++ b/active-standby-services/active-standby-controller.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +STATE=$1 + +echo "$0 called with state=$STATE" + +/usr/bin/systemctl restart active-standby-monitor.service diff --git a/active-standby-services/active-standby-monitor.sh b/active-standby-services/active-standby-monitor.sh new file mode 100755 index 0000000..33e0429 --- /dev/null +++ b/active-standby-services/active-standby-monitor.sh @@ -0,0 +1,41 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "active-cold-standby started" + +SERVICESDIR=/etc/monitoring/active-standby-services/ +VIP=$1 +while [ 1 ]; do + allocated=$(/usr/sbin/ip -4 a | grep $VIP | wc -l) + if [ $allocated -gt 0 ]; then + for service in $(ls $SERVICESDIR); do + /bin/systemctl is-active --quiet $service + if [ $? -ne 0 ]; then + echo "monitoring starting $service" + systemctl start --no-block $service + fi + done + else + for service in $(ls $SERVICESDIR); do + /bin/systemctl is-active --quiet $service + if [ $? -eq 0 ]; then + echo "monitoring stopping $service" + systemctl stop --no-block $service + fi + done + fi + sleep 10 +done diff --git a/active-standby-services/config-manager.service b/active-standby-services/config-manager.service new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/active-standby-services/config-manager.service @@ -0,0 +1 @@ +# diff --git a/active-standby-services/rediscontroller.service b/active-standby-services/rediscontroller.service new file mode 100644 index 0000000..792d600 --- /dev/null +++ b/active-standby-services/rediscontroller.service @@ -0,0 +1 @@ +# diff --git a/monitoring.spec b/monitoring.spec new file mode 100644 index 0000000..257d575 --- /dev/null +++ b/monitoring.spec @@ -0,0 +1,67 @@ +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Name: monitoring +Version: %{_version} +Release: 1%{?dist} +Summary: keepalived based node monitor and vip management +License: %{_platform_licence} +Source0: %{name}-%{version}.tar.gz +Vendor: %{_platform_vendor} +BuildArch: noarch +Requires: keepalived +BuildRequires: python +BuildRequires: python-setuptools + +%description +This RPM contains code for the keepalived based monitoring + +%prep +%autosetup + +%build + +%install +mkdir -p %{buildroot}/opt/monitoring/ +cp src/*.sh %{buildroot}/opt/monitoring/ +cp src/*.py %{buildroot}/opt/monitoring/ + +mkdir -p %{buildroot}/etc/monitoring/quorum-state-changed-actions +mkdir -p %{buildroot}/etc/monitoring/node-state-changed-actions + +mkdir -p %{buildroot}/etc/monitoring/active-standby-services +cp active-standby-services/*.service %{buildroot}/etc/monitoring/active-standby-services/ + +cp active-standby-services/active-standby-controller.sh %{buildroot}/etc/monitoring/node-state-changed-actions/ +cp active-standby-services/active-standby-monitor.sh %{buildroot}/opt/monitoring/ + +%files +/opt/monitoring/* +/etc/monitoring/quorum-state-changed-actions +/etc/monitoring/node-state-changed-actions +/etc/monitoring/node-state-changed-actions/* +/etc/monitoring/active-standby-services/* + +%pre + +%post +echo "monitoring succesfully installed" + + +%preun + +%postun + +%clean +rm -rf %{buildroot} diff --git a/src/become-redis-master.sh b/src/become-redis-master.sh new file mode 100755 index 0000000..342137c --- /dev/null +++ b/src/become-redis-master.sh @@ -0,0 +1,31 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "Becoming master..." + +filename=/etc/redis.conf +passwd=$(egrep -e "^requirepass" $filename | awk '{print $2}') + +for ((i=0; i<5; i++)); do + redis-cli -a $passwd slaveof no one + if [ $? -eq 0 ]; then + break + fi + + sleep 1 +done + +exit 0 diff --git a/src/become-redis-slave.sh b/src/become-redis-slave.sh new file mode 100755 index 0000000..b1e2f7e --- /dev/null +++ b/src/become-redis-slave.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "Becoming slave of $1" + +filename=/etc/redis.conf +passwd=$(egrep -e "^requirepass" $filename | awk '{print $2}') + +redis-cli -a $passwd slaveof $1 6379 + +exit 0 diff --git a/src/dbwatchdog-node-shutdown-handler.sh b/src/dbwatchdog-node-shutdown-handler.sh new file mode 100755 index 0000000..1c78cfe --- /dev/null +++ b/src/dbwatchdog-node-shutdown-handler.sh @@ -0,0 +1,29 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "Running $0" + +for ((i=0; i<50; i++)); do + echo "Waiting for mariadb to become inactive" + state=$(/usr/bin/systemctl is-active mariadb) + if [ "x$state" == "xinactive" -o "x$state" == "xfailed" ]; then + echo "mariadb is completely stopped" + break + fi + sleep 1 +done + +exit 0 diff --git a/src/dbwatchdog.sh b/src/dbwatchdog.sh new file mode 100755 index 0000000..d34dbc1 --- /dev/null +++ b/src/dbwatchdog.sh @@ -0,0 +1,550 @@ +#!/bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DBAGENT_LOG=/var/log/dbwatchdog.log +DOMAIN=galera +OWNNODE=$(hostname) +DSSCLI=/usr/local/bin/dsscli +BECOMEMASTERATTR=become-master +LOCKNAME=galera +LOCKTIMEOUT=60 +LOCKCLI=/usr/local/bin/lockcli +LOCKNAME=galera +LOCKHOLDER=$OWNNODE +LOCKUUID=0 +LOCKUUID_FILE=/var/run/.$DOMAIN.lock.uuid + +declare -a dbnodes +dbnodes_count=1 + +function get_db_nodes() +{ + IFS=',' read -a dbnodes <<< $1 + dbnodes_count=${#dbnodes[@]} +} + + + +function log() +{ + local priority=$1 + shift + local message=$1 + + logger $priority "${FUNCNAME[2]} ${message}" + echo "$(date) ($priority) ${FUNCNAME[2]} ${message}" >> $DBAGENT_LOG +} + +function log_info() +{ + log info "$@" +} + +function log_error() +{ + log error "$@" +} + +function run_cmd() +{ + local result + local ret + log_info "Running $*" + result=$(eval "$*" 2>&1) + ret=$? + if [ $ret -ne 0 ]; then + log_error "Failed with error $result" + else + log_info "Command succeeded: $result" + fi + echo "$result" + return $ret +} + +function is_db_instance_running() +{ + output=$(/usr/bin/mysql -h $node -e "select 1" 2>&1) + if [ $? -eq 0 ]; then + log_info "DB instance in $node is up" + return 1 + fi + + echo $output | grep "Access denied" + if [ $? -eq 0 ]; then + log_info "DB instance in $node is up" + return 1 + fi + + return 0 +} + +function is_single_node() +{ + log_info "Checking if we are running in single-node environment" + if [ $dbnodes_count -gt 1 ]; then + return 0 + fi + + return 1 +} + +function lock() +{ + log_info "Acquiring lock" + while [ 1 ]; do + output=$($LOCKCLI lock --id $LOCKNAME --timeout $LOCKTIMEOUT) + if [ $? -eq 0 ]; then + LOCKUUID=$(echo $output | grep "uuid=" | /bin/awk -F= '{print $2}') + break + fi + log_info "Cannot acquire lock, waiting..." + sleep 5 + done +} + +function unlock() +{ + log_info "Releasing lock" + uuid=$(cat $LOCKUUID_FILE) + run_cmd "$LOCKCLI unlock --id $LOCKNAME --uuid $uuid" + return 0 +} + +function set_becoming_master() +{ + log_info "Setting becoming master" + run_cmd "$DSSCLI set --domain $DOMAIN --name $BECOMEMASTERATTR --value $OWNNODE" + + ret=$? + + if [ $ret -eq 0 ]; then + while [ 1 ]; do + log_info "Waiting for become master to be set" + is_becoming_master_set + if [ $? -eq 1 ]; then + break + fi + sleep 1 + done + fi + + return $ret +} + +function is_becoming_master_set() +{ + log_info "Checking if becoming master is set" + value=$(run_cmd "$DSSCLI get --domain $DOMAIN --name $BECOMEMASTERATTR") + if [ $? -ne 0 ]; then + value=none + fi + if [ "z$value" != "znone" ]; then + return 1 + fi + return 0 +} + +function get_becoming_master_node() +{ + log_info "Getting the node trying to become master" + value=$(run_cmd "$DSSCLI get --domain $DOMAIN --name $BECOMEMASTERATTR") + ret=$? + if [ $ret -ne 0 ]; then + value=none + fi + echo $value + return $ret +} + +function unset_becoming_master() +{ + log_info "Unsetting becoming master" + run_cmd "$DSSCLI set --domain $DOMAIN --name $BECOMEMASTERATTR --value none" +} + + +function set_wsrep_new_cluster() +{ + log_info "Setting new cluster and safe to bootstrap" + run_cmd "sed -i 's/^safe_to_bootstrap: 0/safe_to_bootstrap: 1/g' /var/lib/mysql/grastate.dat" + run_cmd "systemctl set-environment _WSREP_NEW_CLUSTER='--wsrep-new-cluster'" +} + +function unset_wsrep_new_cluster() +{ + log_info "Clearing new cluster flag and safe to bootstrap" + run_cmd "sed -i 's/^safe_to_bootstrap: 1/safe_to_bootstrap: 0/g' /var/lib/mysql/grastate.dat" + run_cmd "systemctl set-environment _WSREP_NEW_CLUSTER=''" +} + +### own attributes +function set_running() +{ + log_info "Setting running flag to true" + run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.running --value true" +} + +function unset_running() +{ + log_info "Setting running flag to false" + run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.running --value false" + +} + +function write_state() +{ + uuid=$(grep uuid /var/lib/mysql/grastate.dat | awk '{print $2}') + seqno=$(grep seqno /var/lib/mysql/grastate.dat | awk '{print $2}') + run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.uuid --value $uuid" + run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.seqno --value $seqno" +} + +### query functions +function get_node_uuid() +{ + node=$1 + log_info "Getting uuid of node $node" + uuid=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.uuid") + ret=$? + if [ $ret -ne 0 ]; then + uuid=0 + fi + echo $uuid + return $ret +} + +function get_node_seqno() +{ + node=$1 + log_info "Getting seqno of node $node" + seqno=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.seqno") + ret=$? + if [ $ret -ne 0 ]; then + seqno=-1 + fi + echo $seqno + return $ret +} + +function do_others_have_good_seqno() +{ + node=$1 + log_info "Checking if any node have a valid seqno" + for no in $($DSSCLI get-domain --domain $DOMAIN | grep seqno | awk '{print $3}'); do + if [ $no -gt 0 ]; then + log_info "Some node have a valid seqno" + return 1 + fi + done + log_info "No node with valid seqno found" + return 0 +} + +function get_node_running() +{ + node=$1 + log_info "Getting if $node is running" + running=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.running") + if [ $? -ne 0 ]; then + log_info "command failed with error $running" + running='false' + fi + log_info "Total running $running" + if [ "z$running" == "ztrue" ]; then + return 1 + fi + return 0 +} + + +function is_any_db_instance_running() +{ + log_info "Getting nodes in which the db is running" + total_initializing=0 + for node in "${dbnodes[@]}"; do + if [ "x$node" == "x$OWNNODE" ]; then + continue + fi + + is_db_instance_running $node + if [ $? -eq 1 ]; then + log_info "DB instance in $node is up" + return 1 + fi + done + + return 0 +} +function is_cluster_running() +{ + log_info "Checking if an existing galera cluster is running" + + #check if any instance of the db is up and running + is_any_db_instance_running + if [ $? -eq 1 ]; then + return 1 + fi + + return 0 +} + +function wait_cluster_running() +{ + log_info "Waiting for cluster to become running" + while [ 1 ]; do + lock + is_cluster_running + cluster_running=$? + if [ $cluster_running -eq 1 ]; then + log_info "cluster is running" + unlock + return 0 + fi + unlock + sleep 5 + done +} +function start_pre() +{ + log_info "start_pre called" + #check for single node case + is_single_node + single_node=$? + if [ $single_node -eq 1 ]; then + echo "Doing nothing as we are running in a single-node environment" + return 0 + fi + #acquire lock + lock + is_cluster_running + cluster_running=$? + if [ $cluster_running -eq 1 ]; then + log_info "starting normally as a galera cluster is already running" + return 0 + fi + + #check if we have good seqno, if not then we need to wait for the active + #as we cannot become master + log_info "checking own sequence number" + seqno=$(get_node_seqno $OWNNODE) + if [ $seqno -le 0 ]; then + #check the seqno of others + do_others_have_good_seqno + if [ $? -eq 1 ]; then + log_info "bad seqno $seqno we need to wait for cluster to become running" + unlock + wait_cluster_running + lock + return 0 + fi + fi + + if [ $seqno -le 0 ]; then + log_info "no one seems to have a good seqno" + else + log_info "no running galera cluster found and we have good seqno" + fi + + log_info "check if someone is trying to become master" + is_becoming_master_set + becoming_master=$? + if [ $becoming_master -eq 1 ]; then + log_info "someone is trying to become master, backing off" + unlock + wait_cluster_running + lock + return 0 + fi + + log_info "no one is trying to become master, let us become master" + set_becoming_master + set_wsrep_new_cluster + return 0 +} + +function start_post() +{ + log_info "start_post setting running state to true" + #check for single node case + is_single_node + single_node=$? + if [ $single_node -eq 1 ]; then + echo "Doing nothing as we are running in a single-node environment" + return 0 + fi + is_in_quorum + qm=$? + if [ $qm -eq 1 ]; then + become_master_node=$(get_becoming_master_node) + if [ "x$become_master_node" == "x$OWNNODE" ]; then + unset_becoming_master + fi + fi + + set_running + unset_wsrep_new_cluster + unlock + + return 0 +} + +function stop_post() +{ + log_info "stop_post setting running state to false" + #check for single node case + is_single_node + single_node=$? + if [ $single_node -eq 1 ]; then + echo "Doing nothing as we are running in a single-node environment" + return 0 + fi + is_in_quorum + qm=$? + if [ $qm -eq 1 ]; then + become_master_node=$(get_becoming_master_node) + if [ "x$become_master_node" == "x$OWNNODE" ]; then + unset_becoming_master + fi + fi + + unset_wsrep_new_cluster + if [ $qm -eq 1 ]; then + write_state + unset_running + for ((i=0; i<10; i++)); do + log_info "Waiting for own state to become not running" + get_node_running $OWNNODE + if [ $? -eq 0 ]; then + log_info "Own state is updated" + break + fi + sleep 2 + done + fi + unlock + return 0 +} + +function stop() +{ + log_info "waiting until clustercheck is ok" + is_single_node + single_node=$? + if [ $single_node -eq 1 ]; then + log_info "Doing nothing as we are running in a single-node environment" + return 0 + fi + + while true; do + /usr/local/bin/clustercheck + if [ $? -eq 0 ]; then + log_info "clustercheck is ok" + break + fi + sleep 2 + done +} + +function get_states() +{ + log_info "Getting states" + run_cmd "$DSSCLI get-domain --domain $DOMAIN" + run_cmd "$DSSCLI get-domain --domain _locks" + is_in_quorum + if [ $? -eq 1 ]; then + echo "Nodes have quorum" + else + echo "Nodes don't have quorum" + fi +} + +function is_in_quorum() +{ + log_info "Checking if peer nodes are running" + nodes=$($DSSCLI get-domain --domain galera | grep running | awk -F. '{print $1}') + if [ $? -ne 0 ]; then + return 0 + fi + + count=0 + down=0 + up=0 + for node in "${dbnodes[@]}"; do + let count=$count+1 + is_db_instance_running $node + if [ $? -eq 1 ]; then + let up=$up+1 + else + let down=$down+1 + fi + done + + log_info "Total $count, up $up, down $down" + + if [ $count -eq 1 ]; then + return 1 + fi + + if [ $up -gt $down ]; then + return 1 + fi + + return 0 +} + + +function kill_old() +{ + log_info "Checking for hanging mysqld services" + mysqlpid=$(/usr/sbin/pidof mysqld) + if [ "x$mysqlpid" == "x" ]; then + return + fi + kill -9 $mysqlpid +} + +if [ $# -ne 2 ]; then + echo "Usage:$0 start-pre|start-post|stop|stop-post|get-states|set-running|kill-old|do-others-have-good-seqno " + exit 1 +fi + +get_db_nodes $2 + +if [ $1 == "start-pre" ]; then + start_pre +elif [ $1 == "start-post" ]; then + start_post +elif [ $1 == "stop" ]; then + stop +elif [ $1 == "stop-post" ]; then + stop_post +elif [ $1 == "get-states" ]; then + get_states +elif [ $1 == "set-running" ]; then + set_running +elif [ $1 == "kill-old" ]; then + kill_old +elif [ $1 == "do-others-have-good-seqno" ]; then + do_others_have_good_seqno +elif [ $1 == "is-any-db-instance-running" ]; then + is_any_db_instance_running + result=$? + echo "Result is $result" +else + echo "Invalid option provided" + echo "Usage:$0 start-pre|start-post|stop|stop-post|get-states|set-running|kill-old|do-others-have-good-seqno|is-any-db-instance-running" + exit 1 +fi diff --git a/src/keepalivedmonitor.py b/src/keepalivedmonitor.py new file mode 100755 index 0000000..92e7ad4 --- /dev/null +++ b/src/keepalivedmonitor.py @@ -0,0 +1,58 @@ +#! /usr/bin/python + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import socket +import select +import os +import errno +import sys + +if __name__ == '__main__': + host = socket.gethostname() + ip = socket.gethostbyname(host) + port = int(sys.argv[1]) + + print("Starting listening to port %d" % port) + + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + s.setblocking(0) + s.bind((ip, port)) + s.listen(1) + inputs = [s] + while True: + try: + readable, _, _ = select.select(inputs, [], []) + for f in readable: + if f is s: + client, address = s.accept() + client.setblocking(0) + inputs.append(client) + #print("Accepted connection from %r, total inputs %d" % (address, len(inputs))) + else: + try: + result = f.recv() + if not result: + inputs.remove(f) + except Exception as exp: + inputs.remove(f) + except (SystemExit, KeyboardInterrupt): + break + except select.error as ex: + if ex.args[0] == errno.EINTR: + break + + print("Stopping...") diff --git a/src/monitoring_can_become_master.sh b/src/monitoring_can_become_master.sh new file mode 100755 index 0000000..8cfdb4f --- /dev/null +++ b/src/monitoring_can_become_master.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source monitoring_common.sh +STOP_FILE=/run/.monitoring_do_not_become_master +if [ -f $STOP_FILE ]; then + log info "$0 giveup the master role" + exit 1 +fi + +exit 0 diff --git a/src/monitoring_common.sh b/src/monitoring_common.sh new file mode 100755 index 0000000..91c6637 --- /dev/null +++ b/src/monitoring_common.sh @@ -0,0 +1,42 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +KEEPALIVED_LOG=/var/log/monitoring.log +ID=monitoring + +QUORUM_ACTIONS_DIR=/etc/monitoring/quorum-state-changed-actions +NODE_STATE_ACTIONS_DIR=/etc/monitoring/node-state-changed-actions + +function log() +{ + local priority=$1 + shift + local message=$1 + + logger $priority "${FUNCNAME[2]} ${message}" + echo "$(date) ($priority) $ID ${FUNCNAME[2]} ${message}" >> $KEEPALIVED_LOG +} + +function execute_actions() +{ + DIR=$1 + shift + for file in $(ls $DIR/*.sh); do + log info "Running $file" + bash $file $* + log info "Result $?" + done +} diff --git a/src/monitoring_quorum_down.sh b/src/monitoring_quorum_down.sh new file mode 100755 index 0000000..6a7f001 --- /dev/null +++ b/src/monitoring_quorum_down.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source monitoring_common.sh + +log info "$0 called" + +execute_actions $QUORUM_ACTIONS_DIR DOWN + +exit 0 + diff --git a/src/monitoring_quorum_up.sh b/src/monitoring_quorum_up.sh new file mode 100755 index 0000000..6b70c2a --- /dev/null +++ b/src/monitoring_quorum_up.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source monitoring_common.sh + +log info "$0 called" + +execute_actions $QUORUM_ACTIONS_DIR UP + +exit 0 + diff --git a/src/monitoring_state_changed.sh b/src/monitoring_state_changed.sh new file mode 100755 index 0000000..232b5aa --- /dev/null +++ b/src/monitoring_state_changed.sh @@ -0,0 +1,27 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +source monitoring_common.sh + +TYPE=$1 +NAME=$2 +STATE=$3 + +log info "$0 called with $*" + +execute_actions $NODE_STATE_ACTIONS_DIR $STATE + +exit 0 diff --git a/src/recover-db-files.sh b/src/recover-db-files.sh new file mode 100755 index 0000000..856d76f --- /dev/null +++ b/src/recover-db-files.sh @@ -0,0 +1,108 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ $# -ne 2 ]; then + echo "Usage:$0 " + exit 1 +fi + +node_arg=$1 +backup_dir=$2 +node=$(hostname) + +if [ $node_arg -ne $node ]; then + echo "You need to run the script from the same node where the corrupted db is" + exit 1 +fi + +echo "Creating backup directory $backup_dir" + +mkdir -p $backup_dir + +if [ $? -ne 0 ]; then + echo "Failed to create $backup_dir" + exit 1 +fi + +echo "Locking db service" +/opt/nokia/bin/hascli -l -o /$node/mariadb/mariadb +if [ $? -ne 0 ]; then + echo "Failed to lock /$node/mariadb/mariadb" + exit 1 +fi + +echo "Copying existing db files" +cp -r /var/lib/mysql $backup_dir + +echo "Removing old db files" +rm -rf /var/lib/mysql + +echo "Recreating db directory" +mkdir /var/lib/mysql +chown mysql:mysql /var/lib/mysql +chmod 2755 /var/lib/mysql + +echo "Installing the db" +/usr/bin/mysql_install_db --datadir=/var/lib/mysql --user=mysql +if [ $? -ne 0 ]; then + echo "db installation failed" + exit 1 +fi +chown -R mysql:mysql /var/lib/mysql/ +/usr/sbin/restorecon -R /var/lib/mysql + +echo "Starting db in safe mode" +/usr/bin/mysqld_safe --wsrep-provider=none & +if [ $? -ne 0 ]; then + echo "Failed to start db in safe mode" + exit 1 +fi + +echo "Waiting for db to become up" +while [ 1 ]; do + /bin/mysqladmin -h localhost -u root --password= ping | grep "mysqld is alive" + if [ $? -eq 0 ]; then + echo "DB is now up" + break + fi + echo "DB is not yet up, waiting..." + sleep 2 +done + +echo "Fix the passwords/grants" +root_password=$(sudo grep password /root/.my.cnf | cut -d'=' -f2) +echo "grant all on *.* to root@localhost identified by \"$root_password\";" >/tmp/restore.sql +echo "set password for 'root'@'localhost' = password(\"$root_password\");" >>/tmp/restore.sql +rc=0 +mysql -h localhost -u root --password= < /tmp/restore.sql +if [ $? -ne 0 ]; then + echo "Failed to fix grants" + rc=1 +fi + +echo "Shutting down the db" +/usr/bin/mysqladmin -h localhost -u root shutdown +if [ $? -ne 0 ]; then + echo "Failed to shutdown the db" + rc=1 +fi + +if [ $rc -eq 0 ]; then + echo "DB files recovered successfully, starting db" + /opt/nokia/bin/hascli -u -o /$node/mariadb/mariadb +fi + +exit $rc diff --git a/src/rediscontroller.sh b/src/rediscontroller.sh new file mode 100755 index 0000000..34a4950 --- /dev/null +++ b/src/rediscontroller.sh @@ -0,0 +1,21 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "redis controller started" + +while [ 1 ]; do + sleep 600 +done diff --git a/src/redismonitor.sh b/src/redismonitor.sh new file mode 100755 index 0000000..bef9b49 --- /dev/null +++ b/src/redismonitor.sh @@ -0,0 +1,38 @@ +#! /bin/bash + +# Copyright 2019 Nokia + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "redis monitor started" + +filename=/etc/redis.conf +passwd=$(egrep -e "^requirepass" $filename | awk '{print $2}') + +while [ 1 ]; do + redis-cli -a $passwd info | grep "role:master" 2> /dev/null 1>&2 + master=$? + + systemctl status rediscontroller 2> /dev/null 1>&2 + active=$? + + if [ $active -eq 0 -a $master -ne 0 ]; then + echo "Changing redis db to master" + /opt/monitoring/become-redis-master.sh + elif [ $active -ne 0 -a $master -eq 0 ]; then + echo "Changing redis db to slave" + /opt/monitoring/become-redis-slave.sh $1 + fi + + sleep 10 +done -- 2.16.6