#!/bin/sh # # ha: Sun Enterprise Server check - BB external script test # ##### Purpose is to report back to a central server, all Veritas ##### FirstWatch or Cluster Server faults. ##### # # version 1.0 - only works with Veritas FirstWatch # version 2.0 - updated for Veritas Cluster Server # version 2.1 - properly uses $CAT instead of cat # version 2.2 - properly uses $MACHINE instead of `uname -n` in bb-hosts check # version 2.3 - properly uses $THIS_HOST instead of $MACHINE due to fqdn using # comma in name - thanks Craig Cook # version 2.4 - changed /tmp to $BBTMP # ha to $TEST # $THIS_HOST to $MACHINE # moved comment explaining purpose of script to start of code # moved program hardcoding into bbsys.local (ie $FWBIN) # fixed output of the compare between hastatus and hasys # beautified the output with TABs a bit # cleaned up a lot of extraneous calls to cat and grep and made # awk do all the work # version 3.0 - pretty much rewrote all but the logic of the tests # Warns if default values are not used # Applied changes by Todd Jimenez # - created get_header and get_footer functions # - set summary value # - added &red to red alerts # - added &yellow to yellow alerts # restored functionality to check for the existence of "ha" # after the server name in bb-hosts, but now optional # changed &red and &yellow to variables # version 3.1 - added tests for D, G and H lines of hastatus # version 3.2 - added tests for E lines of hastatus # # WARNING: the comments below are just a guide, seek advice from your # security team before following these instructions # I had problems where "bbuser" could not run HAevent... # to make this work I copied the HAevent binary into # $BBHOME/ext/bin # chown root:bb HAevent # chmod u+s HAevent # chmod g+x HAevent (allow big brother to execute it as root) # chmod o-a HAevent (don't allow world group access) # Do the same for hastaus # Do the same for hasys # change permissions on the dir so only big brother can # get into it (chmod 750 $BBHOME/ext/bin) # update bbsys.local # Another method is to use "sudo" - one catch though, your sudo # logs will fill up quickly # # BIG BROTHER / XXXXXXXXXXXXXXXX status # # Written by Paul A. Luzzi # on March 22, 2000 # # 2.4 Updates by Mike Arnold # on November 14, 2001 # # 3.0 Updates by Mike Arnold # on November 21, 2001 # # 3.1 Updates by Mike Arnold # on November 27, 2001 # # 3.2 Updates by Mike Arnold # on November 28, 2001 # ######################################## # NOTE # This has been tested with BB 1.8c # # Tested on : # Sun E4500 ######################################## ######################################## # INSTALLATION # step 1 - update bb-bbexttab to include this ha # (older BB versions update EXT section of the bbdef.sh script) # # step 2 - copy lines mentioned to bbsys.local (without the #'s) # # step 3 - if you are using an older version of BB without bb-bbexttab # and you don't want this run on every client uncomment # CHECK_BB_HOSTS="Y" and add the name of this $TEST to # bb-hosts for this client. eg. # myserver1.domain.com # ha # # step 4 - restart Big Brother # # NOTE - the TEST variable in the configuration section, this is the name used # as the column header. ######################################## ################################## # CONFIGURE IT HERE ################################## TEST="ha" BBPROG="$0"; export BBPROG # # Start of lines to put in bbsys.local # # PKGINFO=/usr/bin/pkginfo # FWBIN=/opt/VRTSfw/bin # VCSBIN=/opt/VRTSvcs/bin # HAEVENT=${FWBIN}/HAevent # HASTATUS=${VCSBIN}/hastatus # HASYS=${VCSBIN}/hasys # LLTSTAT=/sbin/lltstat # export FWBIN VCSBIN HAEVENT HASTATUS HASYS LLTSTAT PKGINFO # # End of lines to put in bbsys.local # # define colours for graphics # Comment these out if using older BB versions RED_PIC="&red" YELLOW_PIC="&yellow" GREEN_PIC="&green" # don't scan through bb-hosts every time # this is here for older BB versions without bb-bbexttab # uncomment to activate #CHECK_BB_HOSTS=Y ################################## # Start of script ################################## #BBHOME=/home/bb/bb; export BBHOME if test ! "$BBHOME" then echo "template: BBHOME is not set" exit 1 fi if test ! -d "$BBHOME" then echo "template: BBHOME is invalid" exit 1 fi if test ! "$BBTMP" # GET DEFINITIONS IF NEEDED then # echo "*** LOADING BBDEF ***" . $BBHOME/etc/bbdef.sh # INCLUDE STANDARD DEFINITIONS fi get_header() { echo "" echo "$1
($2)
" # If you do not want the header in a bigger font use line below instead #echo "$1 ($2)" # If you want the "Paul Luzzi" look uncomment this section and comment # out the above sections: #echo "


" #echo "============== $1 ==============" #echo "--- ($2) ---" #echo "
" #echo "
" } get_footer() { echo "" echo "" # If you want the "Paul Luzzi" look uncomment this section and comment # out the above sections: #echo "
" } ##### ##### Get Status proc - used to get all responses ##### get_status() { ##### ##### Setup some variables for use later ##### COLOR="green" # Check defaults have been set if [ "$PKGINFO" = "" ]; then PKGINFO=/usr/bin/pkginfo echo "" echo "$YELLOW_PIC PKGINFO command is not defined in etc/bbsys.local - using default: $PKGINFO" fi if [ "$FWBIN" = "" ]; then FWBIN=/opt/VRTSfw/bin echo "" echo "$YELLOW_PIC FWBIN command is not defined in etc/bbsys.local - using default: $FWBIN" fi if [ "$VCSBIN" = "" ]; then VCSBIN=/opt/VRTSvcs/bin echo "" echo "$YELLOW_PIC VCSBIN command is not defined in etc/bbsys.local - using default: $VCSBIN" fi if [ "$HAEVENT" = "" ]; then HAEVENT=$FWBIN/HAevent echo "" echo "$YELLOW_PIC HAEVENT command is not defined in etc/bbsys.local - using default: $HAEVENT" fi if [ "$HASTATUS" = "" ]; then HASTATUS=$VCSBIN/hastatus echo "" echo "$YELLOW_PIC HASTATUS command is not defined in etc/bbsys.local - using default: $HASTATUS" fi if [ "$HASYS" = "" ]; then HASYS=$VCSBIN/hasys echo "" echo "$YELLOW_PIC HASYS command is not defined in etc/bbsys.local - using default: $HASYS" fi if [ "$LLTSTAT" = "" ]; then LLTSTAT=/sbin/lltstat echo "" echo "$YELLOW_PIC LLTSTAT command is not defined in etc/bbsys.local - using default: $LLTSTAT" fi ##### ##### Find which cluster software is used ##### CLUSTER_TYPE=`$PKGINFO | $GREP VRTS | $AWK '{print $2}' ` if [ "`echo "$CLUSTER_TYPE" | $GREP fw `" ]; then CLUSTER="VRTSfw" elif [ "`echo "$CLUSTER_TYPE" | $GREP vcs `" ]; then CLUSTER="VRTSvcs" fi ##### ##### now decide what to do based on clustering software ##### case "$CLUSTER" in VRTSfw) # Output the results to a file so as not to run the commands multiple # times $HAEVENT status > $BBTMP/$MACHINE.$TEST.status $HAEVENT -remote status > $BBTMP/$MACHINE.$TEST.rstatus $HAEVENT hstatus > $BBTMP/$MACHINE.$TEST.hstatus ##### ##### Veritas First Watch ##### get_header "First Watch Server Status Info" "$HAEVENT status" CATCH_COMMAND1=`$EGREP "ONLINE_PRIMARY|TAKEOVER_READY" $BBTMP/$MACHINE.$TEST.status` if [ "$?" -eq "0" ]; then echo "Status of the Primary Service : \c" echo "$CATCH_COMMAND1" else COLOR="red" echo "$RED_PIC Problem with First Watch Primary !!!" echo "Actual output was : \n$CATCH_COMMAND1" fi get_footer ##### ##### Veritas First Watch ##### get_header "First Watch Remote Server Status Info" "$HAEVENT -remote status" CATCH_COMMAND2=`$EGREP "ONLINE_PRIMARY|TAKEOVER_READY" $BBTMP/$MACHINE.$TEST.rstatus` if [ "$?" -eq "0" ]; then echo "Status of the Remote Service : \c" echo "$CATCH_COMMAND2" else COLOR="red" echo "$RED_PIC Problem with First Watch Remote !!!" echo "Actual output was : \n$CATCH_COMMAND2" fi get_footer ##### ##### Veritas First Watch Heartbeat Info ##### get_header "First Watch Heartbeat Status Info" "$HAEVENT hstatus" CATCH_COMMAND1=`$GREP Heartbeat $BBTMP/$MACHINE.$TEST.hstatus | $GREP -v Yes` if [ ! "$?" -eq "0" ]; then echo " Status of the Primary Heartbeat : " $CAT $BBTMP/$MACHINE.$TEST.hstatus else COLOR="red" echo "$RED_PIC Problem with First Watch Heartbeat !!!" echo "Actual output was : " $CAT $BBTMP/$MACHINE.$TEST.hstatus fi get_footer # Clean up after ourselves $RM $BBTMP/$MACHINE.$TEST.status $RM $BBTMP/$MACHINE.$TEST.rstatus $RM $BBTMP/$MACHINE.$TEST.hstatus ;; VRTSvcs) # Output the results to a file so as not to run the commands multiple # times $HASTATUS -summary > $BBTMP/$MACHINE.$TEST.status $HASYS -list > $BBTMP/$MACHINE.$TEST.sys $LLTSTAT -l > $BBTMP/$MACHINE.$TEST.lltstat # Sanity check before we start: if [ "`$CAT $BBTMP/$MACHINE.$TEST.status`" = "Local system not available" ]; then echo "\nERROR! The output of \"$HASTATUS -summary\" is:\n\n`$CAT $BBTMP/$MACHINE.$TEST.status`" COLOR="red" return 1 fi # Break out of the testing at this point since VCS is not running. ##### ##### Veritas Cluster System State ##### get_header "Cluster System State" "$HASTATUS -summary | grep \"^A\"" CATCH_COMMAND0=`$AWK '/^A/{print $2"\t\t"$3"\t\t"$4}' $BBTMP/$MACHINE.$TEST.status` # Possible values seem to be RUNNING, EXITED, and FAULTED (or GREEN, # YELLOW, and RED in BBspeak). echo $CATCH_COMMAND0 | $GREP "EXITED" >/dev/null 2>&1 if [ "$?" -eq "0" ]; then if [ $COLOR != "red" ]; then COLOR="yellow" DOT_COLOR="$YELLOW_PIC" fi fi echo $CATCH_COMMAND0 | $GREP "FAULTED" >/dev/null 2>&1 if [ "$?" -eq "0" ]; then COLOR="red" DOT_COLOR="$RED_PIC" fi if [ ! -z $CATCH_COMMAND0 ]; then echo "${DOT_COLOR:-$GREEN_PIC} Cluster System State : " echo "System State Frozen" echo "$CATCH_COMMAND0" else COLOR="red" echo "$RED_PIC Problem with Cluster Services !!!" echo "Output of $HASTATUS -summary was : \n`$CAT $BBTMP/$MACHINE.$TEST.status`" fi # If the number of running hosts does not equal the number of hosts # in the cluster, then turn red. # Counting with awk is fun... CATCH_COMMAND1=`$AWK '/RUNNING/{++x};END{print x}' $BBTMP/$MACHINE.$TEST.status` CATCH_COMMAND2=`$AWK '{++x};END{print x}' $BBTMP/$MACHINE.$TEST.sys` if [ ! "$CATCH_COMMAND1" = "$CATCH_COMMAND2" ]; then echo "$YELLOW_PIC One or more of the members of the cluster appear down." echo "Only showing "${CATCH_COMMAND1:-0}" of "${CATCH_COMMAND2:-0}" members up." echo "Running systems are : `$AWK '/RUNNING/{print $2}' $BBTMP/$MACHINE.$TEST.status`" if [ $COLOR != "red" ]; then COLOR="yellow" fi fi DOT_COLOR= get_footer ##### ##### Veritas Cluster Group State ##### get_header "Cluster Group State" "$HASTATUS -summary | grep \"^B\"" CATCH_COMMAND3=`$AWK '/^B/{print $2"\t"$3"\t\t"$6}' $BBTMP/$MACHINE.$TEST.status` # Possible values we are looking for are PARTIAL and FAULTED (or # YELLOW, and RED in BBspeak). echo $CATCH_COMMAND3 | $GREP "PARTIAL" >/dev/null 2>&1 if [ "$?" -eq "0" ]; then if [ $COLOR != "red" ]; then COLOR="yellow" DOT_COLOR="$YELLOW_PIC" fi fi echo $CATCH_COMMAND3 | $GREP "FAULTED" >/dev/null 2>&1 if [ "$?" -eq "0" ]; then COLOR="red" DOT_COLOR="$RED_PIC" fi if [ ! -z $CATCH_COMMAND3 ]; then echo "${DOT_COLOR:-$GREEN_PIC} Cluster Server Group Status : " echo "Group Server State" echo "$CATCH_COMMAND3" else COLOR="red" echo "$RED_PIC Problem with Cluster Services !!!" echo "Output of $HASTATUS -summary was : \n`$CAT $BBTMP/$MACHINE.$TEST.status`" fi DOT_COLOR= get_footer ##### ##### Veritas Cluster Resources Failed ##### get_header "Cluster Resources Failed" "$HASTATUS -summary | grep \" C \"" CATCH_COMMAND4=`$AWK '/ C /{print $2"\t"$3"\t\t"$4"\t"$5}' $BBTMP/$MACHINE.$TEST.status` if [ ! -z "$CATCH_COMMAND4" ]; then echo "$RED_PIC Cluster Server Failed Resources : " echo "Group Type Resource System" echo "$CATCH_COMMAND4" COLOR="red" else echo "$GREEN_PIC Cluster Server Failed Resources : " echo " None - All OK!" fi get_footer ##### ##### Veritas Cluster Resources Not Probed ##### get_header "Cluster Resources Not Probed" "$HASTATUS -summary | grep \"^D\"" CATCH_COMMAND5=`$AWK '/^D/{print $2"\t"$3"\t\t"$4"\t"$5}' $BBTMP/$MACHINE.$TEST.status` if [ ! -z "$CATCH_COMMAND5" ]; then echo "$YELLOW_PIC Cluster Server Resources Not Probed : " echo "Group Type Resource System" echo "$CATCH_COMMAND5" if [ $COLOR != "red" ]; then COLOR="yellow" fi else echo "$GREEN_PIC Cluster Server Resources Not Probed : " echo " None - All OK!" fi get_footer ##### ##### Veritas Cluster Resources Onlining ##### get_header "Cluster Resources Onlining" "$HASTATUS -summary | grep \"^E\"" CATCH_COMMAND6=`$AWK '/^E/{print $2"\t"$3"\t\t"$4"\t"$5"\t"$6}' $BBTMP/$MACHINE.$TEST.status` if [ ! -z "$CATCH_COMMAND6" ]; then echo "$YELLOW_PIC Cluster Server Resources Onlining : " echo "Group Type Resource System IState" echo "$CATCH_COMMAND6" if [ $COLOR != "red" ]; then COLOR="yellow" fi else echo "$GREEN_PIC Cluster Server Resources Onlining : " echo " None - All OK!" fi get_footer ##### ##### Veritas Cluster Resources Offlining ##### get_header "Cluster Resources Offlining" "$HASTATUS -summary | grep \"^F\"" CATCH_COMMAND6=`$AWK '/^F/{print $2"\t"$3"\t"$4"\t"$5"\t"$6}' $BBTMP/$MACHINE.$TEST.status` if [ ! -z "$CATCH_COMMAND6" ]; then echo "$RED_PIC Cluster Server Offline Resources : " echo "Group Type Resource System IState" echo "$CATCH_COMMAND6" COLOR="red" else echo "$GREEN_PIC Cluster Server Offline Resources : " echo " None - All OK!" fi get_footer ##### ##### Veritas Cluster Groups Frozen ##### get_header "Cluster Groups Frozen" "$HASTATUS -summary | grep \"^G\"" CATCH_COMMAND7=`$AWK '/^G/{print $2}' $BBTMP/$MACHINE.$TEST.status` if [ ! -z "$CATCH_COMMAND7" ]; then echo "$YELLOW_PIC Cluster Server Groups Frozen : " echo "Group" echo "$CATCH_COMMAND7" if [ $COLOR != "red" ]; then COLOR="yellow" fi else echo "$GREEN_PIC Cluster Server Groups Frozen : " echo " None - All OK!" fi get_footer ##### ##### Veritas Cluster Resources Disabled ##### get_header "Cluster Resources Disabled" "$HASTATUS -summary | grep \"^H\"" CATCH_COMMAND8=`$GREP "^H" $BBTMP/$MACHINE.$TEST.status | $SED 's/^H //` if [ ! -z "$CATCH_COMMAND8" ]; then echo "$YELLOW_PIC Cluster Server Resources Disabled : " echo "Group Type Resource" echo "$CATCH_COMMAND8" if [ $COLOR != "red" ]; then COLOR="yellow" fi else echo "$GREEN_PIC Cluster Server Resources Disabled : " echo " None - All OK!" fi get_footer ##### ##### Veritas Cluster Server Heartbeat Info ##### get_header "Cluster Server Heartbeat Info" "$LLTSTAT -l | grep ether" CATCH_COMMAND9=`$AWK '/ether/{print $1"\t"$2"\t\t"$3"line"}' $BBTMP/$MACHINE.$TEST.lltstat` if [ ! -z "$CATCH_COMMAND9" ]; then echo "$GREEN_PIC Heartbeat Link Status : \n" echo "Link Interface Status" echo "$CATCH_COMMAND9" else COLOR="red" echo "$RED_PIC Problem with Heartbeat !!!" echo "Output of $LLTSTAT -l was : \n`$CAT $BBTMP/$MACHINE.$TEST.lltstat`" fi CATCH_COMMAND9=`$AWK '/ether/{++x};END{print x}' $BBTMP/$MACHINE.$TEST.lltstat` if [ ! "$CATCH_COMMAND9" -gt "1" ]; then echo "\n\n$RED_PIC Problem with heartbeat - one or more are down." echo "Only showing ${CATCH_COMMAND9:-0} configured and should be at least 2." COLOR="red" fi get_footer # Clean up after ourselves $RM $BBTMP/$MACHINE.$TEST.status $RM $BBTMP/$MACHINE.$TEST.sys $RM $BBTMP/$MACHINE.$TEST.lltstat ;; *) ##### ##### No Cluster Info Available ##### get_header "No Cluster Server Info" "ALERT!!! ALERT!!!" COLOR="yellow" echo "No Cluster Heartbeat Configured !!!" echo "Is Veritas FirstWatch or Cluster Service Installed ???" get_footer ;; ##### ##### End of case statement ##### esac ##### ##### Make sure to export COLOR so that it gets back to "central" ##### export COLOR ##### ##### End of get_status proc ##### } ##### ##### Main body ##### if [ "$CHECK_BB_HOSTS" = "Y" ]; then # convert "," to "." in the hostname MACHINE_WITH_DOTS=`echo $MACHINE | $SED 's/,/\./g'` $GREP $MACHINE_WITH_DOTS $BBHOSTS | $GREP "$TEST" | while read line do if [ ! -z "$line" ]; then get_status > $BBTMP/$MACHINE.$TEST # NOW USE THE BB COMMAND TO SEND THE DATA ACROSS $BB $BBDISP "status $BBTMP/$MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST` " fi done else get_status > $BBTMP/$MACHINE.$TEST # NOW USE THE BB COMMAND TO SEND THE DATA ACROSS $BB $BBDISP "status $MACHINE.$TEST $COLOR `$DATE` `$CAT $BBTMP/$MACHINE.$TEST` " fi # Clean up our mess # Checking for existence of each file since the whole test may be optional # and may not actually run on every client # if [ -f $BBTMP/$MACHINE.$TEST ]; then $RM $BBTMP/$MACHINE.$TEST fi ############################################## # end of script ##############################################