--- projects/cms/source/server/build/etc/system.conf 2001/03/22 01:05:45 1.44 +++ projects/cms/source/server/build/etc/system.conf 2004/01/15 13:41:46 1.76 @@ -5,282 +5,363 @@ # configuration system has this file as its # root of configuration # +# This is a working sample configuration, which +# aims to provide a working "out of the box" +# setup, aswell as a some example configuration. +# # $Author: tdb $ -# $Id: system.conf,v 1.44 2001/03/22 01:05:45 tdb Exp $ +# $Id: system.conf,v 1.76 2004/01/15 13:41:46 tdb Exp $ # +# http://www.i-scream.org.uk/ +# + ############################################################ #### Configuration Files & Groups ############################################################ -# for computing machines -group.computing=Host.129.12.4.*; -group.rocks=Host.agate.ukc.ac.uk;Host.arkose.ukc.ac.uk;Host.basalt.ukc.ac.uk;Host.chalk.ukc.ac.uk;\ - Host.chert.ukc.ac.uk;Host.granite.ukc.ac.uk;Host.jade.ukc.ac.uk;Host.jasper.ukc.ac.uk;\ - Host.magnetite.ukc.ac.uk;Host.obsidian.ukc.ac.uk;Host.pumice.ukc.ac.uk;\ - Host.pyrite.ukc.ac.uk;Host.slate.ukc.ac.uk;Host.topaz.ukc.ac.uk; -group.compsoc=Host.compsoc1.ukc.ac.uk +# All components, such as hosts, will take their configuration +# as given in this file. Unless, that is, they have an extra +# configuration file specified here, or they're in a group +# that has a configuration file. If this is the case the extra +# configuration will override what is given in this file. -config.computing=computing.conf -config.rocks=rocks.conf -config.compsoc1=rocks.conf +# We can define an extra configuration file for any component +# of the system. This is done by specifying the following: +# +# config.component=someconfigfile.conf +# +# Component names will be defined in the documentation for +# the component. An example is that a host component is +# known by the name "Host.hostname" or "Host.ip". +# It is also possible to use the wildcard "*" in a +# hostname or IP address. -# for library machines -group.library=Host.129.12.58.*; -config.library=library.conf +# This defines that server.example.com has some extra +# configuration in the file 'server.conf'. +config.Host.server.example.com=exserver.conf -# for SSB machines -group.ssb=Host.stue*.ukc.ac.uk; -config.ssb=ssb.conf +# All webservers are placed in a group named 'www'. +group.www=Host.www1.example.com;Host.www2.example.com +# Then we define some extra config for them. +config.www=webservers.conf -# mySQL configuration -config.mySQL=mySQL.conf +# Our workstations are in their own subnet. +group.workstations=Host.192.168.10.*; +# They can have their own config too. +config.workstations=workstations.conf +# Any hosts not specified by now have the default +# config as given in this file. + + ############################################################ #### Misc system-wide configuration ############################################################ -# the time (in seconds) that checks should be made for -# changes in configuration +# The ConfigurationProxy caches configuration for various +# parts of the server. This setting defines how often, in +# seconds, the proxy will check if the data it contains +# is still valid. ConfigurationProxy.updateTime=60 -# The time interval at which the Queue's should be monitored -Queue.MonitorInterval=15 -# The upper limit for a Queue +# The internal Queue's can be monitored. This specifies +# the interval, in seconds, at which their state should +# be monitored. +Queue.MonitorInterval=300 +# The maximum size a Queue can be before items are +# dropped when new ones arrive. Queue.SizeLimit=1000 -# The removal algorithm +# And which items we should drop when the queue is full +# and new items arrive. Choices are: +# RANDOM - drop a random item from the queue +# FIRST - drop the first item from the queue +# LAST - drop the last item from the queue +# DROP - drop the new item from the queue Queue.RemoveAlgorithm=FIRST +# The XMLCache caches data passing through the system. +# This setting specifies the time, in seconds, at which +# the caches contents will be rotated. In real terms +# this means the minimum age at which an XMLPacket can +# be before it is up for deletion from the cache. +XMLCache.cleanupPeriod=30 + ############################################################ #### FilterManager configuration ############################################################ -# The default port to listen for new host connections +# The FilterManager accepts connections from hosts +# upon startup. It provides them with configuration +# and details of which filter to talk to. This setting +# specifies which port (TCP) it listens on. FilterManager.listenPort=4567 +# An Access Control List for the FilterManager. One +# might consider something like this: +# +# FilterManager.ACL=DEFAULT:DENY;*.example.com:ALLOW +FilterManager.ACL=DEFAULT:ALLOW ############################################################ #### Filter configuration ############################################################ -# The default port for Filter's to listen on +# A Filter accepts data from hosts on a UDP port. It is +# specified here. Filter.UDPListenPort=4589 -Filter.TCPListenPort=4589 -# Plugin Information -Filter.PluginsPackage=uk.org.iscream.filter.plugins -Filter.Plugins=TypeChecker;EnforceEssentialData +# Filters contain plugins for checking the data passing +# through them. This plugins do the job of "filtering". +# This setting specifies the java package they're +# contained in. +Filter.PluginsPackage=uk.org.iscream.cms.server.filter.plugins +# And the names of the plugins to be loaded. +Filter.Plugins=TypeChecker;EnforceEssentialData;SourceChecker -# The name for the root filter -RootFilter.name=root - -# The default parent filter (should almost always be the same as Filter.rootFilter) +# Every Filter, except the root, needs a parent to +# send their data to. This is usually the root filter, +# although in more complex setups it might be another +# filter - giving a tree structure. Filter.parentFilter=root +# A Filter can accept data through UDP, or from +# an upstream Filter (CORBA). These methods can be +# individually turned of on a filter. The default +# is to have them all on. +# (comment or set to 0 to disable) +Filter.ActivateUDPReader=1 +Filter.ActivateCORBAReader=1 +# We can also specify Access Control Lists for the +# UDP input of the Filters. One might want something +# restrictive like this: +# +# Filter.UDPACL=DEFAULT:DENY;*.example.com:ALLOW +Filter.UDPACL=DEFAULT:ALLOW +# A Filter plugin called the SourceChecker verifies +# that packets come from permitted hosts. It is +# given an ACL to check the hosts against. A possible +# ACL might be: +# +# Filter.SourceCheckerPluginACL=DEFAULT:DENY;*.example.com:ALLOW +Filter.SourceCheckerPluginACL=DEFAULT:ALLOW + + ############################################################ -#### Root filter interfaces configuration +#### Root filter configuration ############################################################ -# Comment either of these (or delete them) and -# that interface won't be started -RootFilter.realtimeInterfaceName=realtimeclients -RootFilter.dbInterfaceName=database +# The special filter known as the "Root Filter" needs +# a name within the system. +RootFilter.name=root - ############################################################ #### Client interface configuration ############################################################ -# The port for the client interface to listen on +# The client interface listens for connections from clients +# on a TCP port. It is specified here. ClientInterface.listenPort=4510 +# The client interface needs to connect to another one to +# receive data. There is a special client interface in the +# root filter which is the top of the tree. +ClientInterface.parent=root +# The Client Interface has two channels, control and data. +# Each of these can have an ACL for allowing connections. +# As per previous examples in this configuration file they +# could be set restrictively, but we'll leave them open +# by default +ClientInterface.TCPControlChannelACL=DEFAULT:ALLOW +ClientInterface.TCPDataChannelACL=DEFAULT:ALLOW + ############################################################ +#### LocalClient Configuration +############################################################ + +# The parent for the Local Client +Client.parent=clientinterface1 + + +############################################################ #### Host Configuration ############################################################ -# The default filter for a new host -Host.filter=computingFilter; +# A host can use any filter. We'll set the default filter +# for all hosts to be 'filter1'. This can be a list. +Host.filter=filter1; -# The default update times for a new host -Host.UDPUpdateTime=10 -Host.TCPUpdateTime=60 -Host.AveragerUpdateTime=5 +# Hosts send data every UDPUpdateTime, in seconds. +Host.UDPUpdateTime=60 -# The services checks to run on a host -Host.serviceChecksPackage=uk.org.iscream.filter.plugins +# How long a hosts config is valid for (a TTL effectively) +# after this time, in seconds, has passed a host should +# consider it's configuration invalid and check for a new +# configuration as soon as it can. +Host.ConfigTTL=3600 +# We can run service checks on a host. The java package +# which contains them is specified here. +Host.serviceChecksPackage=uk.org.iscream.cms.server.filter.plugins ############################################################ #### Monitor Configuration ############################################################ -# the monitors to run -Monitor.PluginsPackage=uk.org.iscream.client.monitors -Monitor.Plugins=CPU;Disk;Memory;Swap;Services;Heartbeat;WebFeeder; -#Monitor.Plugins=CPU;WebFeeder; -#Monitor.Plugins= +# Monitors allow data to be watched for possible problems. +# This setting specifies which java package they're in. +Monitor.PluginsPackage=uk.org.iscream.cms.server.client.monitors +# And which ones should be loaded. +Monitor.Plugins=CPU;Load;Process;Disk;Memory;Swap;Services;\ + Heartbeat;Queue;UserCount;WebFeeder;DiskIO;\ + NetIO;Paging; +# Alerts timeout after a given time in seconds and go up +# to the next level. These can be overridden for monitors +# on an individual basis. +Monitor.alertTimeout.NOTICE=60 +Monitor.alertTimeout.WARNING=900 +Monitor.alertTimeout.CAUTION=1800 +Monitor.alertTimeout.CRITICAL=3600 -## CPU monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. -Monitor.CPU.threshold.LOWER=90 -Monitor.CPU.threshold.UPPER=99 +# Is monitoring enabled? (comment or set to 0 to disable) +Monitor.enable=1 -# timings of the alerts (in seconds) -Monitor.CPU.alertTimeout.NOTICE=60 -Monitor.CPU.alertTimeout.WARNING=900 -Monitor.CPU.alertTimeout.CAUTION=1800 -Monitor.CPU.alertTimeout.CRITICAL=3600 +## Threshold values +# +# Here we specify the threshold for each monitor. When +# a threshold is broken an alert is raised. If the +# UPPER threshold is broken the alertTimeout values +# given above are halved to make the alert escalate +# quicker. +# CPU monitor threshold values +Monitor.CPU.threshold.LOWER=90 +Monitor.CPU.threshold.UPPER=95 +# enable this monitor (comment or set to 0 to disable) +Monitor.CPU.enable=1 -## Load monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. +# Load monitor threshold values Monitor.Load.threshold.LOWER=10.0 Monitor.Load.threshold.UPPER=20.0 +# enable this monitor (comment or set to 0 to disable) +Monitor.Load.enable=1 -# timings of the alerts (in seconds) -Monitor.Load.alertTimeout.NOTICE=60 -Monitor.Load.alertTimeout.WARNING=900 -Monitor.Load.alertTimeout.CAUTION=1800 -Monitor.Load.alertTimeout.CRITICAL=3600 +# Paging monitor threshold values +Monitor.Paging.threshold.LOWER=1000 +Monitor.Paging.threshold.UPPER=5000 +# enable this monitor (comment or set to 0 to disable) +Monitor.Paging.enable=1 +# Process Count threshold values +Monitor.Process.threshold.LOWER=500 +Monitor.Process.threshold.UPPER=1000 +# enable this monitor (comment or set to 0 to disable) +Monitor.Process.enable=1 -## Process Count monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. -Monitor.Process.threshold.LOWER=1000 -Monitor.Process.threshold.UPPER=1500 - -# timings of the alerts (in seconds) -Monitor.Process.alertTimeout.NOTICE=60 -Monitor.Process.alertTimeout.WARNING=900 -Monitor.Process.alertTimeout.CAUTION=1800 -Monitor.Process.alertTimeout.CRITICAL=3600 - - -## Memory monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. +# Memory monitor threshold values Monitor.Memory.threshold.LOWER=80 Monitor.Memory.threshold.UPPER=90 +# Whether to include 'cache' in the 'free' value +# (1 is true, anything else is false) +Monitor.Memory.useCacheAsFree=1 +# enable this monitor (comment or set to 0 to disable) +Monitor.Memory.enable=1 -# timings of the alerts (in seconds) -Monitor.Memory.alertTimeout.NOTICE=60 -Monitor.Memory.alertTimeout.WARNING=900 -Monitor.Memory.alertTimeout.CAUTION=1800 -Monitor.Memory.alertTimeout.CRITICAL=3600 - - -## Swap monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. +# Swap monitor threshold values Monitor.Swap.threshold.LOWER=80 Monitor.Swap.threshold.UPPER=90 +# enable this monitor (comment or set to 0 to disable) +Monitor.Swap.enable=1 -# timings of the alerts (in seconds) -Monitor.Swap.alertTimeout.NOTICE=60 -Monitor.Swap.alertTimeout.WARNING=900 -Monitor.Swap.alertTimeout.CAUTION=1800 -Monitor.Swap.alertTimeout.CRITICAL=3600 +# UserCount monitor threshold values +Monitor.UserCount.threshold.LOWER=100 +Monitor.UserCount.threshold.UPPER=200 +# enable this monitor (comment or set to 0 to disable) +Monitor.UserCount.enable=1 - -## Disk monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. +# Disk monitor threshold values Monitor.Disk.threshold.LOWER=90 Monitor.Disk.threshold.UPPER=95 +Monitor.DiskInode.threshold.LOWER=90 +Monitor.DiskInode.threshold.UPPER=95 +# Thresholds given are PERCENTAGE or VALUE +# where PERCENTAGE is a percentage of disk in use +# and VALUE is an absolute value of space free +Monitor.Disk.thresholdMeasure=PERCENTAGE +Monitor.DiskInode.thresholdMeasure=PERCENTAGE +# enable this monitor (comment or set to 0 to disable) +Monitor.Disk.enable=1 +Monitor.DiskInode.enable=1 -# timings of the alerts (in seconds) -Monitor.Disk.alertTimeout.NOTICE=60 -Monitor.Disk.alertTimeout.WARNING=900 -Monitor.Disk.alertTimeout.CAUTION=1800 -Monitor.Disk.alertTimeout.CRITICAL=3600 +# DiskIO monitor threshold values (in bytes) +Monitor.DiskIO.rbytes.threshold.LOWER=10000000 +Monitor.DiskIO.rbytes.threshold.UPPER=50000000 +Monitor.DiskIO.wbytes.threshold.LOWER=10000000 +Monitor.DiskIO.wbytes.threshold.UPPER=50000000 +# enable this monitor (comment or set to 0 to disable) +Monitor.DiskIO.enable=1 +# NetIO monitor threshold values (in bytes) +Monitor.NetIO.rxbytes.threshold.LOWER=10000000 +Monitor.NetIO.rxbytes.threshold.UPPER=50000000 +Monitor.NetIO.txbytes.threshold.LOWER=10000000 +Monitor.NetIO.txbytes.threshold.UPPER=50000000 +# enable this monitor (comment or set to 0 to disable) +Monitor.NetIO.enable=1 -## Services monitor configuration -# threshold values +# Queue monitor threshold values +Monitor.Queue.threshold.LOWER=50 +Monitor.Queue.threshold.UPPER=80 +# Thresholds given are PERCENTAGE or VALUE +# where PERCENTAGE is a percentage of the queue in use +# and VALUE is an absolute value of free space +Monitor.Queue.thresholdMeasure=PERCENTAGE +# enable this monitor (comment or set to 0 to disable) +Monitor.Queue.enable=1 + +# Services monitor threshold values # # THE SERVICES MONITOR USES A HACK # WHEREBY IT USES THE SERVICE STATUS -# AS THE THRESHOLD LEVEL. THIS SHOULD +# AS THE THRESHOLD LEVEL. THIS SHOULD # BE LOOKED AT! # -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. -#Monitor.Services.threshold.LOWER=120 -#Monitor.Services.threshold.LOWER=300 - -# timings of the alerts (in seconds) -Monitor.Services.alertTimeout.NOTICE=60 -Monitor.Services.alertTimeout.WARNING=900 -Monitor.Services.alertTimeout.CAUTION=1800 -Monitor.Services.alertTimeout.CRITICAL=3600 - -# Make sure we shut up eventually +# how many times we should be at the highest level before +# we go to a FINAL alert Monitor.Services.reachFINALcount=5 - -## Heartbeat monitor configuration -# threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. -Monitor.Heartbeat.checkPeriod=50 +# Heartbeat threshold values # these are values in seconds from when the heartbeat # was *expected* to arrive (ie. last + hearbeat period) -Monitor.Heartbeat.threshold.LOWER=120 +Monitor.Heartbeat.threshold.LOWER=180 Monitor.Heartbeat.threshold.UPPER=300 - -# timings of the alerts (in seconds) -Monitor.Heartbeat.alertTimeout.NOTICE=60 -Monitor.Heartbeat.alertTimeout.WARNING=900 -Monitor.Heartbeat.alertTimeout.CAUTION=1800 -Monitor.Heartbeat.alertTimeout.CRITICAL=3600 - -# this means it will reach a FINAL level, -# and thus the host will be removed from the monitor list, -# if the highest level alert is reached and occours this -# number of times. -# -# int this setup, that means 5 CRITICAL alerts would occour -# on a Heartbeat before reaching FINAL. -# If this value is wrong or not present, FINAL is never reached -# +# this is how often we will run a "check" of heartbeats +Monitor.Heartbeat.checkPeriod=50 +# how many times we should be at the highest level before +# we go to a FINAL alert Monitor.Heartbeat.reachFINALcount=5 +# list of hosts we expect to have at startup +# this ensures we notice hosts that aren't running, not +# just those that have gone down recently. +#Monitor.Heartbeat.initialHosts=importantserver.example.com;myserver.example.com +# enable this monitor (comment or set to 0 to disable) +Monitor.Heartbeat.enable=1 + ############################################################ #### Alerter Configuration ############################################################ -# The following var's are understood by ALL alerters for messages +# The following variables are understood by ALL alerters +# for messages. They will be substituted for their +# appropriate value. # # %level% - the alert level (eg, WARNING) # %threshold% - the threshold broken (eg, LOWER) @@ -293,33 +374,51 @@ Monitor.Heartbeat.reachFINALcount=5 # %timeOfFirstAlert% - the time the first alert was sent # -# alerters to run -Alerter.PluginsPackage=uk.org.iscream.client.alerters -#Alerter.Plugins=EMail;IRC;WebFeeder; -Alerter.Plugins=IRC;WebFeeder; -#Alerter.Plugins=WebFeeder; -#Alerter.Plugins= +# The java package containing the alerters +Alerter.PluginsPackage=uk.org.iscream.cms.server.client.alerters +# And the names of the ones to activate +# You might want to turn them all on, but we only activate +# two basic ones by default. +# Alerter.Plugins=EMail;IRC;WebFeeder;Logging; +Alerter.Plugins=WebFeeder;Logging; -# EMail alerter configuration +# Configuration for the EMail alerter +# The level alerts have to bypass to be e-mailed out Alerter.EMail.level = WARNING -Alerter.EMail.destList = dev@i-scream.org.uk -Alerter.EMail.sender = dev@i-scream.org.uk -Alerter.EMail.smtpServer = mercury.ukc.ac.uk +# A list of people to send alerts to +Alerter.EMail.destList = me@example.com +# The sender of the e-mail +Alerter.EMail.sender = me@example.com +# The SMTP server to send alerts to +Alerter.EMail.smtpServer = smtp.example.com +# The subject line of the e-mail Alerter.EMail.subject = i-scream alert: %level% alert on %source% for %attributeName% +# The message in the e-mail Alerter.EMail.message = The i-scream distributed central monitoring system has\nraised a %level% alert for the host %source%.\n\nThe value for %attributeName% of %value% has exceeded the\n%threshold% threshold value of %thresholdValue%.\n\nThis alert was originally raised at %timeOfFirstAlert%,\nwhich was %timeSinceFirstAlert% ago.\n\nThe next alert (should one occur) will be sent in %timeTillNextAlert%. - -# IRC alerter configuration +# Configuration for the IRC alerter +# The level alerts have to bypass to be sent Alerter.IRC.level = OK -Alerter.IRC.IRCServer = killigrew.ukc.ac.uk +# The IRC server name +Alerter.IRC.IRCServer = irc.example.com +# and it's port Alerter.IRC.IRCPort = 6667 +# A list of nicks to try and be Alerter.IRC.nickList = iscreamBot;_iscreamBot;i-screamBot +# Username to be Alerter.IRC.user = i-scream -Alerter.IRC.comment = i-scream alerting bot +# Responses to CTCP prods +Alerter.IRC.comment = i-scream alerting bot (based on PircBot) +Alerter.IRC.finger = i-scream alerting bot (based on PircBot) +# Channel to join Alerter.IRC.channel = #i-scream +# Message to use when sending an alert Alerter.IRC.message = %level%: %attributeName% on %source% has passed %threshold%(%thresholdValue%) threshold with %value% - time till next alert (should one occur), %timeTillNextAlert% +# How long to wait before reconnecting Alerter.IRC.reconnectDelay = 30 +# Notice to send when starting up Alerter.IRC.startupNotice = i-scream alerting bot activated +# Commands the bot will recognise Alerter.IRC.stopCommand = stop alerts Alerter.IRC.startCommand = start alerts Alerter.IRC.lastAlertCommand = last alert @@ -330,37 +429,59 @@ Alerter.IRC.uptimeCommand = uptime Alerter.IRC.timeSinceLastAlertCommand = time since last alert Alerter.IRC.versionCommand = version Alerter.IRC.helpCommand = help +# Message to send when the bot is addressed and +# doesn't understand the command Alerter.IRC.rejectMessage = sorry, I don't understand your request +# Configuration for the Logging Alerter +# This alerter writes alerts to the i-scream logfile. +# The level alerts have to bypass to be logged +Alerter.Logging.level = WARNING +# The message to use when logging the alert +Alerter.Logging.message = %level%: %attributeName% on %source% has passed %threshold%(%thresholdValue%) threshold with %value% + + ############################################################ #### WebFeeder Configuration ############################################################ -# Define these (one or both_ to turn on the WebFeeder's, -# comment it to turn them off +# The WebFeeder drops alerts and latest data in to a +# directory for the web interface to use. It is essentially +# the link between the web interface and the server. + +# Set these to something to tell the WebFeeder to drop +# latest data, alerts, or both. Comment themt o disable. WebFeeder.latestActive = true WebFeeder.alertActive = true -# Only pass on alerts past this level -WebFeeder.alertLevel = OK +# We can make the WebFeeder only drop alerts that go +# past a certain level. The default is "OK", which makes +# all alerts get passed on. +Alerter.WebFeeder.level = OK -# Check period in seconds (for cleaning stale/old alerts) +# The WebFeeder cleans out old and stale alerts when it +# performs a check. This setting is the period, in seconds, +# at which a check happens. WebFeeder.checkPeriod = 120 # Delete alerts older than this, in seconds. -# Files are only check every WebFeeder.checkPeriod seconds. WebFeeder.alertDeleteOlderThan = 300 -# Root Path -WebFeeder.rootPath = /usr/local/proj/co600_10/webroot +# Data will be dropped to a subdirectory (specified +# below) of this directory. +WebFeeder.rootPath = /web/i-scream -# Latest Data +# SubDir is the directory below rootPath which latest +# data will be placed in. Below that a directory will +# be created with the machine name, and a file named +# FileName (specified here) will be created containg +# the data. WebFeeder.latestSubDir = latest WebFeeder.latestFileName = latest_data -# Alert Data +# As above, but for the alerts. WebFeeder.alertSubDir = alert WebFeeder.alertFileName = alert_data -############################################################ +############################################################