#! /bin/csh # check4restart 23-Oct-2007 R.Schramm # # Script to check deployment error dir for messages caused by oasis can reboot. # Takes advantage of fact that when oasis3 controller boots it writes a message # to the error log. The oasis procesing program 'extract' parses these messages # to the deployment download directories for each deploymnet (eg: /oasis/m1/error) # # Will send email to list of users in set in the variable FAILMAIL below # Typically run as a cron job by user oasisa as part of the hourly processing # # Usage: check4restart m1 m2 m1test m2test # # SideFX: maintains a 'history' of restart messages for each mooring # in a hidden file indicated in the variable OLD (which is set/reset for # each mooring instance below. (eg. /oasis/bin/.M1_Reset.list. # # Annoyances: if the 'history' file is missing (such as when the script # is run for the first time on a deployment, the script will report # a diff failure to stderr twice and send an email indicating a possible # reset has occured. This may be a false alarm. Subsequent runs should # proceed correctly unless someong deletes the history file or messes with # the error dir for the deployment. # # The script can be tested by modifying any line in the history file. # which will cause diff to trigger an email next time the script is run. # set TRUE = 1 set FALSE = 0 set doM1 = $FALSE set doM2 = $FALSE set doM1Test = $FALSE set doM2Test = $FALSE set nonomatch @ i = 1 while ($i <= $#argv ) if ( ($argv[$i] == "m1") || ($argv[$i] == "M1") ) then set doM1 = $TRUE endif if ( ($argv[$i] == "m2") || ($argv[$i] == "M2") ) then set doM2 = $TRUE endif if ( ($argv[$i] == "m1test") || ($argv[$i] == "M1TEST") ) then set doM1Test = $TRUE endif if ( ($argv[$i] == "m2test") || ($argv[$i] == "M2TEST") ) then set doM2Test = $TRUE endif @ i++ end set ENABLE_FAILMAIL = $TRUE set FAILMAIL = "kemi@mbari.org,coenen@mbari.org,rich@mbari.org" set MAIL = "/bin/mailx -s " ########## M1 ###################### if ( $doM1 == $TRUE ) then set TARGET = "/oasis/m1/error" set OLD = "/oasis/bin/.M1_Resets.list" set NEW = "/oasis/bin/.tmp.list" set MAIL_SUBJECT = "WARNING *** OASIS M1 controller reboot detected" set MAIL_MSG = "A possible reboot of the OASIS M1 Controller was detected by automated processing." find $TARGET -type f | xargs grep Restarted > $NEW set DETAILS = `diff $OLD $NEW` set rtn = $status if ( $rtn ) then set DETAILS = `diff $OLD $NEW | grep Restarted` if ($ENABLE_FAILMAIL == $TRUE) then $MAIL "$MAIL_SUBJECT" $FAILMAIL << ! `date` $MAIL_MSG $DETAILS ! mv -f $NEW $OLD endif endif endif #M1 ########## M2 ###################### if ( $doM2 == $TRUE ) then set TARGET = "/oasis/m2/error" set OLD = "/oasis/bin/.M2_Resets.list" set NEW = "/oasis/bin/.tmp.list" set MAIL_SUBJECT = "WARNING *** OASIS M2 Controller reboot detected" set MAIL_MSG = "A possible reboot of the OASIS M2 controller was detected by automated processing." find $TARGET -type f | xargs grep Restarted > $NEW set DETAILS = `diff $OLD $NEW` set rtn = $status if ( $rtn ) then set DETAILS = `diff $OLD $NEW | grep Restarted` if ($ENABLE_FAILMAIL == $TRUE) then $MAIL "$MAIL_SUBJECT" $FAILMAIL << ! `date` $MAIL_MSG $DETAILS ! mv -f $NEW $OLD endif endif endif #M2 ########## M1 Test ###################### if ( $doM1Test == $TRUE ) then set TARGET = "/oasis/test/m1/error" set OLD = "/oasis/bin/.M1Test_Resets.list" set NEW = "/oasis/bin/.tmp.list" set MAIL_SUBJECT = "WARNING *** OASIS M1-Test Controller reboot detected" set MAIL_MSG = "A possible reboot of the OASIS M1-Test controller was detected by automated processing." find $TARGET -type f | xargs grep Restarted > $NEW set DETAILS = `diff $OLD $NEW` set rtn = $status if ( $rtn ) then set DETAILS = `diff $OLD $NEW | grep Restarted` if ($ENABLE_FAILMAIL == $TRUE) then $MAIL "$MAIL_SUBJECT" $FAILMAIL << ! `date` $MAIL_MSG $DETAILS ! mv -f $NEW $OLD endif endif endif #M1Test ########## M2 Test ###################### if ( $doM2Test == $TRUE ) then set TARGET = "/oasis/test/m2/error" set OLD = "oasis/bin/.M2Test_Resets.list" set NEW = "oasis/bin/.tmp.list" set MAIL_SUBJECT = "WARNING *** OASIS M2-Test Controller reboot detected" set MAIL_MSG = "A possible reboot of the OASIS M2-Test controller was detected by automated processing." find $TARGET -type f | xargs grep Restarted > $NEW set DETAILS = `diff $OLD $NEW` set rtn = $status if ( $rtn ) then set DETAILS = `diff $OLD $NEW | grep Restarted` if ($ENABLE_FAILMAIL == $TRUE) then $MAIL "$MAIL_SUBJECT" $FAILMAIL << ! `date` $MAIL_MSG $DETAILS ! mv -f $NEW $OLD endif endif endif #M2Test