--- projects/cms/source/server/build/etc/system.conf 2001/03/08 21:38:05 1.31 +++ projects/cms/source/server/build/etc/system.conf 2001/03/22 02:18:05 1.47 @@ -6,7 +6,7 @@ # root of configuration # # $Author: tdb $ -# $Id: system.conf,v 1.31 2001/03/08 21:38:05 tdb Exp $ +# $Id: system.conf,v 1.47 2001/03/22 02:18:05 tdb Exp $ # ############################################################ #### Configuration Files & Groups @@ -14,7 +14,15 @@ # for computing machines group.computing=Host.129.12.4.*; +group.rocks=Host.agate.ukc.ac.uk;Host.arkose.ukc.ac.uk;Host.basalt.ukc.ac.uk;Host.chalk.ukc.ac.uk;\ + Host.chert.ukc.ac.uk;Host.granite.ukc.ac.uk;Host.jade.ukc.ac.uk;Host.jasper.ukc.ac.uk;\ + Host.magnetite.ukc.ac.uk;Host.obsidian.ukc.ac.uk;Host.pumice.ukc.ac.uk;\ + Host.pyrite.ukc.ac.uk;Host.slate.ukc.ac.uk;Host.topaz.ukc.ac.uk; +group.compsoc=Host.compsoc1.ukc.ac.uk + config.computing=computing.conf +config.rocks=rocks.conf +config.compsoc1=rocks.conf # for library machines group.library=Host.129.12.58.*; @@ -37,9 +45,12 @@ ConfigurationProxy.updateTime=60 # The time interval at which the Queue's should be monitored Queue.MonitorInterval=15 +# The upper limit for a Queue +Queue.SizeLimit=1000 +# The removal algorithm +Queue.RemoveAlgorithm=FIRST - ############################################################ #### FilterManager configuration ############################################################ @@ -58,7 +69,7 @@ Filter.UDPListenPort=4589 Filter.TCPListenPort=4589 # Plugin Information -Filter.PluginsPackage=uk.ac.ukc.iscream.filter.plugins +Filter.PluginsPackage=uk.org.iscream.filter.plugins Filter.Plugins=TypeChecker;EnforceEssentialData # The name for the root filter @@ -94,7 +105,7 @@ ClientInterface.listenPort=4510 ############################################################ # The default filter for a new host -Host.filter=filter1; +Host.filter=computingFilter; # The default update times for a new host Host.UDPUpdateTime=10 @@ -102,8 +113,7 @@ Host.TCPUpdateTime=60 Host.AveragerUpdateTime=5 # The services checks to run on a host -Host.serviceChecksPackage=uk.ac.ukc.iscream.filter.plugins -Host.serviceChecks=HTTP;SMTP;POP3;SSH;FTP;IMAP;Telnet +Host.serviceChecksPackage=uk.org.iscream.filter.plugins @@ -111,87 +121,92 @@ Host.serviceChecks=HTTP;SMTP;POP3;SSH;FTP;IMAP;Telnet #### Monitor Configuration ############################################################ -# the monitors to run -Monitor.PluginsPackage=uk.ac.ukc.iscream.client.monitors -Monitor.Plugins=CPU;Memory;Swap;Heartbeat;WebFeeder; +## The monitors to run +Monitor.PluginsPackage=uk.org.iscream.client.monitors +Monitor.Plugins=CPU;Load;Process;Disk;Memory;Swap;Services;Heartbeat;Queue;WebFeeder; +## Default alert timeouts +# these can be override on a per-monitor basis +# timings of the alerts (in seconds) +Monitor.alertTimeout.NOTICE=60 +Monitor.alertTimeout.WARNING=900 +Monitor.alertTimeout.CAUTION=1800 +Monitor.alertTimeout.CRITICAL=3600 -## CPU monitor configuration -# threshold values +## Threshold values # # note: if the UPPER threshold is broken, # the alertTimeout values are halved. # This is to escalate the alert quicker. +# + +# CPU monitor threshold values Monitor.CPU.threshold.LOWER=90 Monitor.CPU.threshold.UPPER=99 -# timings of the alerts (in seconds) -Monitor.CPU.alertTimeout.NOTICE=60 -Monitor.CPU.alertTimeout.WARNING=900 -Monitor.CPU.alertTimeout.CAUTION=1800 -Monitor.CPU.alertTimeout.CRITICAL=3600 +# Load monitor threshold values +Monitor.Load.threshold.LOWER=10.0 +Monitor.Load.threshold.UPPER=20.0 -## Memory monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. + +# Process Count threshold values +Monitor.Process.threshold.LOWER=1000 +Monitor.Process.threshold.UPPER=1500 + + +# Memory monitor threshold values Monitor.Memory.threshold.LOWER=80 Monitor.Memory.threshold.UPPER=90 -# timings of the alerts (in seconds) -Monitor.Memory.alertTimeout.NOTICE=60 -Monitor.Memory.alertTimeout.WARNING=900 -Monitor.Memory.alertTimeout.CAUTION=1800 -Monitor.Memory.alertTimeout.CRITICAL=3600 - -## Swap monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. +# Swap monitor threshold values Monitor.Swap.threshold.LOWER=80 Monitor.Swap.threshold.UPPER=90 -# timings of the alerts (in seconds) -Monitor.Swap.alertTimeout.NOTICE=60 -Monitor.Swap.alertTimeout.WARNING=900 -Monitor.Swap.alertTimeout.CAUTION=1800 -Monitor.Swap.alertTimeout.CRITICAL=3600 +# Disk monitor threshold values +Monitor.Disk.threshold.LOWER=90 +Monitor.Disk.threshold.UPPER=95 +# Thresholds given are PERCENTAGE or VALUE +# where PERCENTAGE is a percentage of disk in use +# and VALUE is an absolute value of space free +Monitor.Disk.thresholdMeasure=PERCENTAGE -## Heartbeat monitor configuration -# threshold values + +# Disk monitor threshold values +Monitor.Queue.threshold.LOWER=90 +Monitor.Queue.threshold.UPPER=95 +# Thresholds given are PERCENTAGE or VALUE +# where PERCENTAGE is a percentage of disk in use +# and VALUE is an absolute value of space free +Monitor.Queue.thresholdMeasure=PERCENTAGE + + +# Services monitor threshold values # -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. -Monitor.Heartbeat.checkPeriod=50 +# THE SERVICES MONITOR USES A HACK +# WHEREBY IT USES THE SERVICE STATUS +# AS THE THRESHOLD LEVEL. THIS SHOULD +# BE LOOKED AT! +# +# how many times we should be at the highest level before +# we go to a FINAL alert +Monitor.Services.reachFINALcount=5 + + +# Heartbeat threshold values # these are values in seconds from when the heartbeat # was *expected* to arrive (ie. last + hearbeat period) Monitor.Heartbeat.threshold.LOWER=120 -Monitor.Heartbeat.threshold.LOWER=300 +Monitor.Heartbeat.threshold.UPPER=300 +# this is how often we will run a "check" of heartbeats +Monitor.Heartbeat.checkPeriod=30 +# how many times we should be at the highest level before +# we go to a FINAL alert +Monitor.Heartbeat.reachFINALcount=5 -# timings of the alerts (in seconds) -Monitor.Heartbeat.alertTimeout.NOTICE=60 -Monitor.Heartbeat.alertTimeout.WARNING=900 -Monitor.Heartbeat.alertTimeout.CAUTION=1800 -Monitor.Heartbeat.alertTimeout.CRITICAL=3600 -# this means it will reach a FINAL level, -# and thus the host will be removed from the monitor list, -# if the highest level alert is reached and occours this -# number of times. -# -# int this setup, that means 5 CRITICAL alerts would occour -# on a Heartbeat before reaching FINAL. -# If this value is wrong or not present, FINAL is never reached -# -Monitor.Heartbeat.reachFINALcount=5 ############################################################ #### Alerter Configuration @@ -210,17 +225,19 @@ Monitor.Heartbeat.reachFINALcount=5 # # alerters to run -Alerter.PluginsPackage=uk.ac.ukc.iscream.client.alerters +Alerter.PluginsPackage=uk.org.iscream.client.alerters Alerter.Plugins=EMail;IRC;WebFeeder; + # EMail alerter configuration -Alerter.EMail.level = CAUTION -Alerter.EMail.defaultDestList = dev@i-scream.org.uk +Alerter.EMail.level = WARNING +Alerter.EMail.destList = dev@i-scream.org.uk Alerter.EMail.sender = dev@i-scream.org.uk Alerter.EMail.smtpServer = mercury.ukc.ac.uk Alerter.EMail.subject = i-scream alert: %level% alert on %source% for %attributeName% -Alerter.EMail.message = The i-scream distributed central monitoring system has\nraised a %level% alert for the host %source%.\n\nThe value for %attributeName% of %value% has exceeded the\n%threshold% threshold value of %thresholdValue%.\n\nThe next alert (should one occur) will be sent in %timeTillNextAlert%. +Alerter.EMail.message = The i-scream distributed central monitoring system has\nraised a %level% alert for the host %source%.\n\nThe value for %attributeName% of %value% has exceeded the\n%threshold% threshold value of %thresholdValue%.\n\nThis alert was originally raised at %timeOfFirstAlert%,\nwhich was %timeSinceFirstAlert% ago.\n\nThe next alert (should one occur) will be sent in %timeTillNextAlert%. + # IRC alerter configuration Alerter.IRC.level = OK Alerter.IRC.IRCServer = killigrew.ukc.ac.uk @@ -244,10 +261,27 @@ Alerter.IRC.versionCommand = version Alerter.IRC.helpCommand = help Alerter.IRC.rejectMessage = sorry, I don't understand your request + + ############################################################ #### WebFeeder Configuration ############################################################ +# Define these (one or both_ to turn on the WebFeeder's, +# comment it to turn them off +WebFeeder.latestActive = true +WebFeeder.alertActive = true + +# Only pass on alerts past this level +WebFeeder.alertLevel = OK + +# Check period in seconds (for cleaning stale/old alerts) +WebFeeder.checkPeriod = 120 + +# Delete alerts older than this, in seconds. +# Files are only check every WebFeeder.checkPeriod seconds. +WebFeeder.alertDeleteOlderThan = 300 + # Root Path WebFeeder.rootPath = /usr/local/proj/co600_10/webroot @@ -255,5 +289,9 @@ WebFeeder.rootPath = /usr/local/proj/co600_10/webroot WebFeeder.latestSubDir = latest WebFeeder.latestFileName = latest_data -############################################################ +# Alert Data +WebFeeder.alertSubDir = alert +WebFeeder.alertFileName = alert_data + +############################################################