Initial commit 62/662/1
authorBaha Mesleh <baha.mesleh@nokia.com>
Fri, 22 Mar 2019 09:42:30 +0000 (11:42 +0200)
committerBaha Mesleh <baha.mesleh@nokia.com>
Thu, 9 May 2019 09:26:00 +0000 (12:26 +0300)
Change-Id: Ib2474e54d28c0d62ed5374e4003af7b79b882ead
Signed-off-by: Baha Mesleh <baha.mesleh@nokia.com>
20 files changed:
.gitreview [new file with mode: 0644]
LICENSE [new file with mode: 0644]
active-standby-services/active-standby-controller.sh [new file with mode: 0755]
active-standby-services/active-standby-monitor.sh [new file with mode: 0755]
active-standby-services/config-manager.service [new file with mode: 0644]
active-standby-services/rediscontroller.service [new file with mode: 0644]
monitoring.spec [new file with mode: 0644]
src/become-redis-master.sh [new file with mode: 0755]
src/become-redis-slave.sh [new file with mode: 0755]
src/dbwatchdog-node-shutdown-handler.sh [new file with mode: 0755]
src/dbwatchdog.sh [new file with mode: 0755]
src/keepalivedmonitor.py [new file with mode: 0755]
src/monitoring_can_become_master.sh [new file with mode: 0755]
src/monitoring_common.sh [new file with mode: 0755]
src/monitoring_quorum_down.sh [new file with mode: 0755]
src/monitoring_quorum_up.sh [new file with mode: 0755]
src/monitoring_state_changed.sh [new file with mode: 0755]
src/recover-db-files.sh [new file with mode: 0755]
src/rediscontroller.sh [new file with mode: 0755]
src/redismonitor.sh [new file with mode: 0755]

diff --git a/.gitreview b/.gitreview
new file mode 100644 (file)
index 0000000..a2e598c
--- /dev/null
@@ -0,0 +1,5 @@
+[gerrit]
+host=gerrit.akraino.org
+port=29418
+project=ta/monitoring
+defaultremote=origin
diff --git a/LICENSE b/LICENSE
new file mode 100644 (file)
index 0000000..d645695
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/active-standby-services/active-standby-controller.sh b/active-standby-services/active-standby-controller.sh
new file mode 100755 (executable)
index 0000000..a5f902f
--- /dev/null
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+STATE=$1
+
+echo "$0 called with state=$STATE"
+
+/usr/bin/systemctl restart active-standby-monitor.service
diff --git a/active-standby-services/active-standby-monitor.sh b/active-standby-services/active-standby-monitor.sh
new file mode 100755 (executable)
index 0000000..33e0429
--- /dev/null
@@ -0,0 +1,41 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "active-cold-standby started"
+
+SERVICESDIR=/etc/monitoring/active-standby-services/
+VIP=$1
+while [ 1 ]; do
+    allocated=$(/usr/sbin/ip -4 a | grep $VIP | wc -l)
+    if [ $allocated -gt 0 ]; then
+        for service in $(ls $SERVICESDIR); do
+            /bin/systemctl is-active --quiet $service
+            if [ $? -ne 0 ]; then
+                echo "monitoring starting $service"
+                systemctl start --no-block $service
+            fi
+        done
+    else
+        for service in $(ls $SERVICESDIR); do
+            /bin/systemctl is-active --quiet $service
+            if [ $? -eq 0 ]; then
+                echo "monitoring stopping $service"
+                systemctl stop --no-block $service
+            fi
+        done
+    fi
+    sleep 10
+done
diff --git a/active-standby-services/config-manager.service b/active-standby-services/config-manager.service
new file mode 100644 (file)
index 0000000..792d600
--- /dev/null
@@ -0,0 +1 @@
+#
diff --git a/active-standby-services/rediscontroller.service b/active-standby-services/rediscontroller.service
new file mode 100644 (file)
index 0000000..792d600
--- /dev/null
@@ -0,0 +1 @@
+#
diff --git a/monitoring.spec b/monitoring.spec
new file mode 100644 (file)
index 0000000..257d575
--- /dev/null
@@ -0,0 +1,67 @@
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+Name:       monitoring
+Version:    %{_version}
+Release:    1%{?dist}
+Summary:    keepalived based node monitor and vip management
+License:    %{_platform_licence}
+Source0:    %{name}-%{version}.tar.gz
+Vendor:     %{_platform_vendor}
+BuildArch:  noarch
+Requires: keepalived
+BuildRequires: python
+BuildRequires: python-setuptools
+
+%description
+This RPM contains code for the keepalived based monitoring
+
+%prep
+%autosetup
+
+%build
+
+%install
+mkdir -p %{buildroot}/opt/monitoring/
+cp src/*.sh %{buildroot}/opt/monitoring/
+cp src/*.py %{buildroot}/opt/monitoring/
+
+mkdir -p %{buildroot}/etc/monitoring/quorum-state-changed-actions
+mkdir -p %{buildroot}/etc/monitoring/node-state-changed-actions
+
+mkdir -p %{buildroot}/etc/monitoring/active-standby-services
+cp active-standby-services/*.service %{buildroot}/etc/monitoring/active-standby-services/
+
+cp active-standby-services/active-standby-controller.sh %{buildroot}/etc/monitoring/node-state-changed-actions/
+cp active-standby-services/active-standby-monitor.sh %{buildroot}/opt/monitoring/
+
+%files
+/opt/monitoring/*
+/etc/monitoring/quorum-state-changed-actions
+/etc/monitoring/node-state-changed-actions
+/etc/monitoring/node-state-changed-actions/*
+/etc/monitoring/active-standby-services/*
+
+%pre
+
+%post
+echo "monitoring succesfully installed"
+
+
+%preun
+
+%postun
+
+%clean
+rm -rf %{buildroot}
diff --git a/src/become-redis-master.sh b/src/become-redis-master.sh
new file mode 100755 (executable)
index 0000000..342137c
--- /dev/null
@@ -0,0 +1,31 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "Becoming master..."
+
+filename=/etc/redis.conf
+passwd=$(egrep -e "^requirepass" $filename | awk '{print $2}')
+
+for ((i=0; i<5; i++)); do
+    redis-cli -a $passwd slaveof no one
+    if [ $? -eq 0 ]; then
+        break
+    fi
+
+    sleep 1
+done
+
+exit 0
diff --git a/src/become-redis-slave.sh b/src/become-redis-slave.sh
new file mode 100755 (executable)
index 0000000..b1e2f7e
--- /dev/null
@@ -0,0 +1,24 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "Becoming slave of $1"
+
+filename=/etc/redis.conf
+passwd=$(egrep -e "^requirepass" $filename | awk '{print $2}')
+
+redis-cli -a $passwd slaveof $1 6379
+
+exit 0
diff --git a/src/dbwatchdog-node-shutdown-handler.sh b/src/dbwatchdog-node-shutdown-handler.sh
new file mode 100755 (executable)
index 0000000..1c78cfe
--- /dev/null
@@ -0,0 +1,29 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "Running $0"
+
+for ((i=0; i<50; i++)); do
+    echo "Waiting for mariadb to become inactive"
+    state=$(/usr/bin/systemctl is-active mariadb)
+    if [ "x$state" == "xinactive" -o "x$state" == "xfailed" ]; then
+        echo "mariadb is completely stopped"
+        break
+    fi
+    sleep 1
+done
+
+exit 0
diff --git a/src/dbwatchdog.sh b/src/dbwatchdog.sh
new file mode 100755 (executable)
index 0000000..d34dbc1
--- /dev/null
@@ -0,0 +1,550 @@
+#!/bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+DBAGENT_LOG=/var/log/dbwatchdog.log
+DOMAIN=galera
+OWNNODE=$(hostname)
+DSSCLI=/usr/local/bin/dsscli
+BECOMEMASTERATTR=become-master
+LOCKNAME=galera
+LOCKTIMEOUT=60
+LOCKCLI=/usr/local/bin/lockcli
+LOCKNAME=galera
+LOCKHOLDER=$OWNNODE
+LOCKUUID=0
+LOCKUUID_FILE=/var/run/.$DOMAIN.lock.uuid
+
+declare -a dbnodes
+dbnodes_count=1
+
+function get_db_nodes() 
+{
+    IFS=',' read -a dbnodes <<< $1
+    dbnodes_count=${#dbnodes[@]}
+}
+
+
+
+function log()
+{
+    local priority=$1
+    shift
+    local message=$1
+
+    logger $priority "${FUNCNAME[2]} ${message}"
+    echo "$(date) ($priority) ${FUNCNAME[2]} ${message}" >> $DBAGENT_LOG
+}
+
+function log_info()
+{
+    log info "$@"
+}
+
+function log_error()
+{
+    log error "$@"
+}
+
+function run_cmd()
+{
+    local result
+    local ret
+    log_info "Running $*"
+    result=$(eval "$*" 2>&1)
+    ret=$?
+    if [ $ret -ne 0 ]; then
+           log_error "Failed with error $result"
+       else
+           log_info "Command succeeded: $result"
+       fi
+    echo "$result"
+    return $ret
+}
+
+function is_db_instance_running()
+{
+    output=$(/usr/bin/mysql -h $node -e "select 1" 2>&1)
+    if [ $? -eq 0 ]; then
+        log_info "DB instance in $node is up"
+        return 1
+    fi
+
+    echo $output | grep "Access denied"
+    if [ $? -eq 0 ]; then
+        log_info "DB instance in $node is up"
+        return 1
+    fi
+
+    return 0
+}
+
+function is_single_node()
+{
+    log_info "Checking if we are running in single-node environment"
+    if [ $dbnodes_count -gt 1 ]; then
+        return 0
+    fi
+
+    return 1
+}
+
+function lock()
+{
+    log_info "Acquiring lock"
+    while [ 1 ]; do
+        output=$($LOCKCLI lock --id $LOCKNAME --timeout $LOCKTIMEOUT)
+        if [ $? -eq 0 ]; then
+            LOCKUUID=$(echo $output | grep "uuid=" | /bin/awk -F= '{print $2}')
+            break
+        fi
+        log_info "Cannot acquire lock, waiting..."
+        sleep 5
+    done
+}
+
+function unlock()
+{
+    log_info "Releasing lock"
+    uuid=$(cat $LOCKUUID_FILE)
+    run_cmd "$LOCKCLI unlock --id $LOCKNAME --uuid $uuid"
+    return 0
+}
+
+function set_becoming_master()
+{
+    log_info "Setting becoming master"
+    run_cmd "$DSSCLI set --domain $DOMAIN --name $BECOMEMASTERATTR --value $OWNNODE"
+
+    ret=$?
+
+    if [ $ret -eq 0 ]; then
+        while [ 1 ]; do
+            log_info "Waiting for become master to be set"
+            is_becoming_master_set
+            if [ $? -eq 1 ]; then
+                break
+            fi
+            sleep 1
+        done
+    fi
+
+    return $ret
+}
+
+function is_becoming_master_set()
+{
+    log_info "Checking if becoming master is set"
+    value=$(run_cmd "$DSSCLI get --domain $DOMAIN --name $BECOMEMASTERATTR")
+    if [ $? -ne 0 ]; then
+        value=none
+    fi
+    if [ "z$value" != "znone" ]; then
+        return 1
+    fi
+    return 0
+}
+
+function get_becoming_master_node()
+{
+    log_info "Getting the node trying to become master"
+    value=$(run_cmd "$DSSCLI get --domain $DOMAIN --name $BECOMEMASTERATTR")
+    ret=$?
+    if [ $ret -ne 0 ]; then
+        value=none
+    fi
+    echo $value
+    return $ret
+}
+
+function unset_becoming_master()
+{
+    log_info "Unsetting becoming master"
+    run_cmd "$DSSCLI set --domain $DOMAIN --name $BECOMEMASTERATTR --value none"
+}
+
+
+function set_wsrep_new_cluster()
+{
+    log_info "Setting new cluster and safe to bootstrap"
+    run_cmd "sed -i 's/^safe_to_bootstrap: 0/safe_to_bootstrap: 1/g' /var/lib/mysql/grastate.dat"
+    run_cmd "systemctl set-environment _WSREP_NEW_CLUSTER='--wsrep-new-cluster'"
+}
+
+function unset_wsrep_new_cluster()
+{
+    log_info "Clearing new cluster flag and safe to bootstrap"
+    run_cmd "sed -i 's/^safe_to_bootstrap: 1/safe_to_bootstrap: 0/g' /var/lib/mysql/grastate.dat"
+    run_cmd "systemctl set-environment _WSREP_NEW_CLUSTER=''"
+}
+
+### own attributes
+function set_running()
+{
+    log_info "Setting running flag to true"
+    run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.running --value true"
+}
+
+function unset_running()
+{
+    log_info "Setting running flag to false"
+    run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.running --value false"
+    
+}
+
+function write_state()
+{
+    uuid=$(grep uuid /var/lib/mysql/grastate.dat  | awk '{print $2}')
+    seqno=$(grep seqno /var/lib/mysql/grastate.dat  | awk '{print $2}')
+    run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.uuid --value $uuid"
+    run_cmd "$DSSCLI set --domain $DOMAIN --name ${OWNNODE}.seqno --value $seqno"
+}
+
+### query functions
+function get_node_uuid()
+{   
+    node=$1
+    log_info "Getting uuid of node $node"
+    uuid=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.uuid")
+    ret=$?
+    if [ $ret -ne 0 ]; then
+        uuid=0
+    fi
+    echo $uuid
+    return $ret
+}
+
+function get_node_seqno()
+{   
+    node=$1
+    log_info "Getting seqno of node $node"
+    seqno=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.seqno")
+    ret=$?
+    if [ $ret -ne 0 ]; then
+        seqno=-1
+    fi
+    echo $seqno
+    return $ret
+}
+
+function do_others_have_good_seqno()
+{   
+    node=$1
+    log_info "Checking if any node have a valid seqno"
+    for no in $($DSSCLI get-domain --domain $DOMAIN | grep seqno | awk '{print $3}'); do
+        if [ $no -gt 0 ]; then
+            log_info "Some node have a valid seqno"
+            return 1
+        fi
+    done
+    log_info "No node with valid seqno found"
+    return 0
+}
+
+function get_node_running()
+{
+    node=$1
+    log_info "Getting if $node is running"
+    running=$(run_cmd "$DSSCLI get --domain $DOMAIN --name ${node}.running")
+    if [ $? -ne 0 ]; then
+        log_info "command failed with error $running"
+        running='false'
+    fi
+    log_info "Total running $running"
+    if [ "z$running" == "ztrue" ]; then
+        return 1
+    fi
+    return 0
+}
+
+
+function is_any_db_instance_running()
+{
+    log_info "Getting nodes in which the db is running"
+    total_initializing=0
+    for node in "${dbnodes[@]}"; do
+        if [ "x$node" == "x$OWNNODE" ]; then
+            continue
+        fi
+
+        is_db_instance_running $node
+        if [ $? -eq 1 ]; then
+            log_info "DB instance in $node is up"
+            return 1
+        fi
+    done
+
+    return 0
+}
+function is_cluster_running()
+{
+    log_info "Checking if an existing galera cluster is running"
+
+    #check if any instance of the db is up and running
+    is_any_db_instance_running
+    if [ $? -eq 1 ]; then
+        return 1
+    fi
+
+    return 0
+}
+
+function wait_cluster_running()
+{
+    log_info "Waiting for cluster to become running"
+    while [ 1 ]; do
+        lock
+        is_cluster_running
+        cluster_running=$?
+        if [ $cluster_running -eq 1 ]; then
+            log_info "cluster is running"
+            unlock
+            return 0
+        fi
+        unlock
+        sleep 5
+    done
+}
+function start_pre()
+{
+    log_info "start_pre called"
+    #check for single node case
+    is_single_node
+    single_node=$?
+    if [ $single_node -eq 1 ]; then
+        echo "Doing nothing as we are running in a single-node environment"
+        return 0
+    fi
+    #acquire lock
+    lock
+    is_cluster_running
+    cluster_running=$?
+    if [ $cluster_running -eq 1 ]; then
+        log_info "starting normally as a galera cluster is already running"
+        return 0
+    fi
+
+    #check if we have good seqno, if not then we need to wait for the active
+    #as we cannot become master
+    log_info "checking own sequence number"
+    seqno=$(get_node_seqno $OWNNODE)
+    if [ $seqno -le 0 ]; then
+        #check the seqno of others
+        do_others_have_good_seqno
+        if [ $? -eq 1 ]; then
+            log_info "bad seqno $seqno we need to wait for cluster to become running"
+            unlock
+            wait_cluster_running
+            lock
+            return 0
+        fi
+    fi
+
+    if [ $seqno -le 0 ]; then
+        log_info "no one seems to have a good seqno"
+    else
+        log_info "no running galera cluster found and we have good seqno"
+    fi
+
+    log_info "check if someone is trying to become master"
+    is_becoming_master_set
+    becoming_master=$?
+    if [ $becoming_master -eq 1 ]; then
+        log_info "someone is trying to become master, backing off"
+        unlock
+        wait_cluster_running
+        lock
+        return 0
+    fi
+
+    log_info "no one is trying to become master, let us become master"
+    set_becoming_master
+    set_wsrep_new_cluster
+    return 0
+}
+
+function start_post()
+{
+    log_info "start_post setting running state to true"
+    #check for single node case
+    is_single_node
+    single_node=$?
+    if [ $single_node -eq 1 ]; then
+        echo "Doing nothing as we are running in a single-node environment"
+        return 0
+    fi
+    is_in_quorum
+    qm=$?
+    if [ $qm -eq 1 ]; then
+        become_master_node=$(get_becoming_master_node)
+        if [ "x$become_master_node" == "x$OWNNODE" ]; then
+            unset_becoming_master
+        fi
+    fi
+
+    set_running
+    unset_wsrep_new_cluster
+    unlock
+
+    return 0
+}
+
+function stop_post()
+{
+    log_info "stop_post setting running state to false"
+    #check for single node case
+    is_single_node
+    single_node=$?
+    if [ $single_node -eq 1 ]; then
+        echo "Doing nothing as we are running in a single-node environment"
+        return 0
+    fi
+    is_in_quorum
+    qm=$?
+    if [ $qm -eq 1 ]; then
+        become_master_node=$(get_becoming_master_node)
+        if [ "x$become_master_node" == "x$OWNNODE" ]; then
+            unset_becoming_master
+        fi
+    fi
+    
+    unset_wsrep_new_cluster
+    if [ $qm -eq 1 ]; then
+        write_state
+        unset_running
+        for ((i=0; i<10; i++)); do
+            log_info "Waiting for own state to become not running"
+            get_node_running $OWNNODE
+            if [ $? -eq 0 ]; then
+                log_info "Own state is updated"
+                break
+            fi
+            sleep 2
+        done
+    fi
+    unlock
+    return 0
+}
+
+function stop()
+{
+    log_info "waiting until clustercheck is ok"
+    is_single_node
+    single_node=$?
+    if [ $single_node -eq 1 ]; then
+        log_info "Doing nothing as we are running in a single-node environment"
+        return 0
+    fi
+
+    while true; do
+        /usr/local/bin/clustercheck
+        if [ $? -eq 0 ]; then
+            log_info "clustercheck is ok"
+            break
+        fi
+        sleep 2
+    done
+}
+
+function get_states()
+{
+    log_info "Getting states"
+    run_cmd "$DSSCLI get-domain --domain $DOMAIN"
+    run_cmd "$DSSCLI get-domain --domain _locks"
+    is_in_quorum
+    if [ $? -eq 1 ]; then
+        echo "Nodes have quorum"
+    else
+        echo "Nodes don't have quorum"
+    fi
+}
+
+function is_in_quorum()
+{
+    log_info "Checking if peer nodes are running"
+    nodes=$($DSSCLI get-domain --domain galera | grep running | awk -F. '{print $1}')
+    if [ $? -ne 0 ]; then
+        return 0
+    fi
+
+    count=0
+    down=0
+    up=0
+    for node in "${dbnodes[@]}"; do
+        let count=$count+1
+        is_db_instance_running $node
+        if [ $? -eq 1 ]; then
+            let up=$up+1
+        else
+            let down=$down+1
+        fi
+    done
+
+    log_info "Total $count, up $up, down $down"
+
+    if [ $count -eq 1 ]; then
+        return 1
+    fi
+
+    if [ $up -gt $down ]; then
+        return 1
+    fi
+
+    return 0
+}
+
+
+function kill_old()
+{
+    log_info "Checking for hanging mysqld services"
+    mysqlpid=$(/usr/sbin/pidof mysqld)
+    if [ "x$mysqlpid" == "x" ]; then
+        return
+    fi
+    kill -9 $mysqlpid
+}
+
+if [ $# -ne 2 ]; then
+    echo "Usage:$0 start-pre|start-post|stop|stop-post|get-states|set-running|kill-old|do-others-have-good-seqno <comma separted list of db node names>"
+    exit 1
+fi
+
+get_db_nodes $2
+
+if [ $1 == "start-pre" ]; then
+    start_pre
+elif [ $1 == "start-post" ]; then
+    start_post
+elif [ $1 == "stop" ]; then
+    stop
+elif [ $1 == "stop-post" ]; then
+    stop_post
+elif [ $1 == "get-states" ]; then
+    get_states
+elif [ $1 == "set-running" ]; then
+    set_running
+elif [ $1 == "kill-old" ]; then
+    kill_old
+elif [ $1 == "do-others-have-good-seqno" ]; then
+    do_others_have_good_seqno
+elif [ $1 == "is-any-db-instance-running" ]; then
+    is_any_db_instance_running
+    result=$?
+    echo "Result is $result"
+else
+    echo "Invalid option provided"
+    echo "Usage:$0 start-pre|start-post|stop|stop-post|get-states|set-running|kill-old|do-others-have-good-seqno|is-any-db-instance-running"
+    exit 1
+fi
diff --git a/src/keepalivedmonitor.py b/src/keepalivedmonitor.py
new file mode 100755 (executable)
index 0000000..92e7ad4
--- /dev/null
@@ -0,0 +1,58 @@
+#! /usr/bin/python
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import socket
+import select
+import os
+import errno
+import sys
+
+if __name__ == '__main__':
+    host = socket.gethostname()
+    ip = socket.gethostbyname(host)
+    port = int(sys.argv[1])
+
+    print("Starting listening to port %d" % port)
+
+    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    s.setblocking(0)
+    s.bind((ip, port))
+    s.listen(1)
+    inputs = [s]
+    while True:
+        try:
+            readable, _, _ = select.select(inputs, [], [])
+            for f in readable:
+                if f is s:
+                    client, address = s.accept()
+                    client.setblocking(0)
+                    inputs.append(client)
+                    #print("Accepted connection from %r, total inputs %d" % (address, len(inputs)))
+                else:
+                    try:
+                        result = f.recv()
+                        if not result:
+                            inputs.remove(f)
+                    except Exception as exp:
+                        inputs.remove(f)
+        except (SystemExit, KeyboardInterrupt):
+            break
+        except select.error as ex:
+            if ex.args[0] == errno.EINTR:
+                break
+
+    print("Stopping...")
diff --git a/src/monitoring_can_become_master.sh b/src/monitoring_can_become_master.sh
new file mode 100755 (executable)
index 0000000..8cfdb4f
--- /dev/null
@@ -0,0 +1,24 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source monitoring_common.sh
+STOP_FILE=/run/.monitoring_do_not_become_master
+if [ -f $STOP_FILE ]; then
+    log info "$0 giveup the master role"
+    exit 1
+fi
+
+exit 0
diff --git a/src/monitoring_common.sh b/src/monitoring_common.sh
new file mode 100755 (executable)
index 0000000..91c6637
--- /dev/null
@@ -0,0 +1,42 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+KEEPALIVED_LOG=/var/log/monitoring.log
+ID=monitoring
+
+QUORUM_ACTIONS_DIR=/etc/monitoring/quorum-state-changed-actions
+NODE_STATE_ACTIONS_DIR=/etc/monitoring/node-state-changed-actions
+
+function log()
+{
+    local priority=$1
+    shift
+    local message=$1
+
+    logger $priority "${FUNCNAME[2]} ${message}"
+    echo "$(date) ($priority) $ID ${FUNCNAME[2]} ${message}" >> $KEEPALIVED_LOG
+}
+
+function execute_actions()
+{
+    DIR=$1
+    shift
+    for file in $(ls $DIR/*.sh); do
+        log info "Running $file"
+        bash $file $*
+        log info "Result $?"
+    done
+}
diff --git a/src/monitoring_quorum_down.sh b/src/monitoring_quorum_down.sh
new file mode 100755 (executable)
index 0000000..6a7f001
--- /dev/null
@@ -0,0 +1,24 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source monitoring_common.sh
+
+log info "$0 called"
+
+execute_actions $QUORUM_ACTIONS_DIR DOWN
+
+exit 0
+
diff --git a/src/monitoring_quorum_up.sh b/src/monitoring_quorum_up.sh
new file mode 100755 (executable)
index 0000000..6b70c2a
--- /dev/null
@@ -0,0 +1,24 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source monitoring_common.sh
+
+log info "$0 called"
+
+execute_actions $QUORUM_ACTIONS_DIR UP
+
+exit 0
+
diff --git a/src/monitoring_state_changed.sh b/src/monitoring_state_changed.sh
new file mode 100755 (executable)
index 0000000..232b5aa
--- /dev/null
@@ -0,0 +1,27 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source monitoring_common.sh
+
+TYPE=$1
+NAME=$2
+STATE=$3
+
+log info "$0 called with $*"
+
+execute_actions $NODE_STATE_ACTIONS_DIR $STATE
+
+exit 0
diff --git a/src/recover-db-files.sh b/src/recover-db-files.sh
new file mode 100755 (executable)
index 0000000..856d76f
--- /dev/null
@@ -0,0 +1,108 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ $# -ne 2 ]; then
+       echo "Usage:$0 <node> <backup-dir>"
+       exit 1
+fi
+
+node_arg=$1
+backup_dir=$2
+node=$(hostname)
+
+if [ $node_arg -ne $node ]; then
+    echo "You need to run the script from the same node where the corrupted db is"
+    exit 1
+fi
+
+echo "Creating backup directory $backup_dir"
+
+mkdir -p $backup_dir
+
+if [ $? -ne 0 ]; then
+       echo "Failed to create $backup_dir"
+       exit 1
+fi
+
+echo "Locking db service"
+/opt/nokia/bin/hascli -l -o /$node/mariadb/mariadb
+if [ $? -ne 0 ]; then
+    echo "Failed to lock /$node/mariadb/mariadb"
+    exit 1
+fi
+
+echo "Copying existing db files"
+cp -r /var/lib/mysql $backup_dir
+
+echo "Removing old db files"
+rm -rf /var/lib/mysql
+
+echo "Recreating db directory"
+mkdir /var/lib/mysql
+chown mysql:mysql /var/lib/mysql
+chmod 2755 /var/lib/mysql
+
+echo "Installing the db"
+/usr/bin/mysql_install_db --datadir=/var/lib/mysql --user=mysql
+if [ $? -ne 0 ]; then
+    echo "db installation failed"
+    exit 1
+fi
+chown -R mysql:mysql /var/lib/mysql/
+/usr/sbin/restorecon -R /var/lib/mysql
+
+echo "Starting db in safe mode"
+/usr/bin/mysqld_safe --wsrep-provider=none &
+if [ $? -ne 0 ]; then
+    echo "Failed to start db in safe mode"
+    exit 1
+fi
+
+echo "Waiting for db to become up"
+while [ 1 ]; do
+    /bin/mysqladmin -h localhost -u root --password= ping | grep "mysqld is alive"
+    if [ $? -eq 0 ]; then
+        echo "DB is now up"
+        break
+    fi
+    echo "DB is not yet up, waiting..."
+    sleep 2
+done
+
+echo "Fix the passwords/grants"
+root_password=$(sudo grep password /root/.my.cnf | cut -d'=' -f2)
+echo "grant all on *.* to root@localhost identified by \"$root_password\";" >/tmp/restore.sql
+echo "set password for 'root'@'localhost' = password(\"$root_password\");" >>/tmp/restore.sql
+rc=0
+mysql -h localhost -u root --password= < /tmp/restore.sql
+if [ $? -ne 0 ]; then
+    echo "Failed to fix grants"
+    rc=1
+fi
+
+echo "Shutting down the db"
+/usr/bin/mysqladmin -h localhost -u root shutdown
+if [ $? -ne 0 ]; then
+    echo "Failed to shutdown the db"
+    rc=1
+fi
+
+if [ $rc -eq 0 ]; then
+    echo "DB files recovered successfully, starting db"
+    /opt/nokia/bin/hascli -u -o /$node/mariadb/mariadb
+fi
+
+exit $rc
diff --git a/src/rediscontroller.sh b/src/rediscontroller.sh
new file mode 100755 (executable)
index 0000000..34a4950
--- /dev/null
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "redis controller started"
+
+while [ 1 ]; do
+    sleep 600
+done
diff --git a/src/redismonitor.sh b/src/redismonitor.sh
new file mode 100755 (executable)
index 0000000..bef9b49
--- /dev/null
@@ -0,0 +1,38 @@
+#! /bin/bash
+
+# Copyright 2019 Nokia
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+#     http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+echo "redis monitor started"
+
+filename=/etc/redis.conf
+passwd=$(egrep -e "^requirepass" $filename | awk '{print $2}')
+
+while [ 1 ]; do
+    redis-cli -a $passwd info | grep "role:master" 2> /dev/null 1>&2
+    master=$?
+
+    systemctl status rediscontroller 2> /dev/null 1>&2
+    active=$?
+
+    if [ $active -eq 0 -a $master -ne 0 ]; then
+        echo "Changing redis db to master"
+        /opt/monitoring/become-redis-master.sh
+    elif [ $active -ne 0 -a $master -eq 0 ]; then
+        echo "Changing redis db to slave"
+        /opt/monitoring/become-redis-slave.sh $1
+    fi
+
+    sleep 10
+done