--- projects/cms/source/server/build/etc/system.conf 2002/05/28 17:11:01 1.67 +++ projects/cms/source/server/build/etc/system.conf 2004/08/01 10:40:39 1.77 @@ -5,243 +5,339 @@ # configuration system has this file as its # root of configuration # +# This is a working sample configuration, which +# aims to provide a working "out of the box" +# setup, aswell as a some example configuration. +# # $Author: tdb $ -# $Id: system.conf,v 1.67 2002/05/28 17:11:01 tdb Exp $ +# $Id: system.conf,v 1.77 2004/08/01 10:40:39 tdb Exp $ # +# http://www.i-scream.org/ +# + ############################################################ #### Configuration Files & Groups ############################################################ -# for computing machines -group.computing=Host.129.12.4.*; -group.rocks=Host.agate.ukc.ac.uk;Host.arkose.ukc.ac.uk;Host.basalt.ukc.ac.uk;Host.chalk.ukc.ac.uk;\ - Host.chert.ukc.ac.uk;Host.granite.ukc.ac.uk;Host.jade.ukc.ac.uk;Host.jasper.ukc.ac.uk;\ - Host.magnetite.ukc.ac.uk;Host.obsidian.ukc.ac.uk;Host.pumice.ukc.ac.uk;\ - Host.pyrite.ukc.ac.uk;Host.slate.ukc.ac.uk;Host.topaz.ukc.ac.uk; -group.compsoc=Host.compsoc1.ukc.ac.uk +# All components, such as hosts, will take their configuration +# as given in this file. Unless, that is, they have an extra +# configuration file specified here, or they're in a group +# that has a configuration file. If this is the case the extra +# configuration will override what is given in this file. -config.computing=computing.conf -config.rocks=rocks.conf -config.compsoc1=rocks.conf +# We can define an extra configuration file for any component +# of the system. This is done by specifying the following: +# +# config.component=someconfigfile.conf +# +# Component names will be defined in the documentation for +# the component. An example is that a host component is +# known by the name "Host.hostname" or "Host.ip". +# It is also possible to use the wildcard "*" in a +# hostname or IP address. -# for library machines -group.library=Host.129.12.58.*; -config.library=library.conf +# This defines that server.example.com has some extra +# configuration in the file 'server.conf'. +config.Host.server.example.com=exserver.conf -# for SSB machines -group.ssb=Host.stue*.ukc.ac.uk; -config.ssb=ssb.conf +# All webservers are placed in a group named 'www'. +group.www=Host.www1.example.com;Host.www2.example.com +# Then we define some extra config for them. +config.www=webservers.conf -# mySQL configuration -config.mySQL=mySQL.conf +# Our workstations are in their own subnet. +group.workstations=Host.192.168.10.*; +# They can have their own config too. +config.workstations=workstations.conf +# Any hosts not specified by now have the default +# config as given in this file. + + ############################################################ #### Misc system-wide configuration ############################################################ -# the time (in seconds) that checks should be made for -# changes in configuration +# The ConfigurationProxy caches configuration for various +# parts of the server. This setting defines how often, in +# seconds, the proxy will check if the data it contains +# is still valid. ConfigurationProxy.updateTime=60 -# The time interval at which the Queue's should be monitored -Queue.MonitorInterval=15 -# The upper limit for a Queue +# The internal Queue's can be monitored. This specifies +# the interval, in seconds, at which their state should +# be monitored. +Queue.MonitorInterval=300 +# The maximum size a Queue can be before items are +# dropped when new ones arrive. Queue.SizeLimit=1000 -# The removal algorithm +# And which items we should drop when the queue is full +# and new items arrive. Choices are: +# RANDOM - drop a random item from the queue +# FIRST - drop the first item from the queue +# LAST - drop the last item from the queue +# DROP - drop the new item from the queue Queue.RemoveAlgorithm=FIRST +# The XMLCache caches data passing through the system. +# This setting specifies the time, in seconds, at which +# the caches contents will be rotated. In real terms +# this means the minimum age at which an XMLPacket can +# be before it is up for deletion from the cache. +XMLCache.cleanupPeriod=30 + ############################################################ #### FilterManager configuration ############################################################ -# The default port to listen for new host connections +# The FilterManager accepts connections from hosts +# upon startup. It provides them with configuration +# and details of which filter to talk to. This setting +# specifies which port (TCP) it listens on. FilterManager.listenPort=4567 -# Access control list -FilterManager.ACL=DEFAULT:DENY;*.ukc.ac.uk:ALLOW;129.12.*:ALLOW +# An Access Control List for the FilterManager. One +# might consider something like this: +# +# FilterManager.ACL=DEFAULT:DENY;*.example.com:ALLOW +FilterManager.ACL=DEFAULT:ALLOW - ############################################################ #### Filter configuration ############################################################ -# The default port for Filter's to listen on +# A Filter accepts data from hosts on a UDP port. It is +# specified here. Filter.UDPListenPort=4589 -Filter.TCPListenPort=4589 -# Plugin Information +# Filters contain plugins for checking the data passing +# through them. This plugins do the job of "filtering". +# This setting specifies the java package they're +# contained in. Filter.PluginsPackage=uk.org.iscream.cms.server.filter.plugins -Filter.Plugins=TypeChecker;EnforceEssentialData;SourceChecker;KeyChecker +# And the names of the plugins to be loaded. +Filter.Plugins=TypeChecker;EnforceEssentialData;SourceChecker -# The name for the root filter -RootFilter.name=root - -# The default parent filter (should almost always be the same as Filter.rootFilter) +# Every Filter, except the root, needs a parent to +# send their data to. This is usually the root filter, +# although in more complex setups it might be another +# filter - giving a tree structure. Filter.parentFilter=root -# The input methods to activate +# A Filter can accept data through UDP, or from +# an upstream Filter (CORBA). These methods can be +# individually turned of on a filter. The default +# is to have them all on. # (comment or set to 0 to disable) -Filter.ActivateTCPReader=1 Filter.ActivateUDPReader=1 Filter.ActivateCORBAReader=1 -# Access control list for the TCP and UDP inputs -Filter.TCPACL=DEFAULT:DENY;*.ukc.ac.uk:ALLOW;129.12.*:ALLOW -Filter.UDPACL=DEFAULT:DENY;*.ukc.ac.uk:ALLOW;129.12.*:ALLOW +# We can also specify Access Control Lists for the +# UDP input of the Filters. One might want something +# restrictive like this: +# +# Filter.UDPACL=DEFAULT:DENY;*.example.com:ALLOW +Filter.UDPACL=DEFAULT:ALLOW -# Access control list for the SourceChecker plugin -# - packets not permitted by this ACL will be filtered -Filter.SourceCheckerPluginACL=DEFAULT:DENY;*.ukc.ac.uk:ALLOW;129.12.*:ALLOW +# A Filter plugin called the SourceChecker verifies +# that packets come from permitted hosts. It is +# given an ACL to check the hosts against. A possible +# ACL might be: +# +# Filter.SourceCheckerPluginACL=DEFAULT:DENY;*.example.com:ALLOW +Filter.SourceCheckerPluginACL=DEFAULT:ALLOW -# Enforce host authentication? -# (comment or set to 0 to disable) -Filter.EnforceHostAuth=1 -# Length of keys to use in authentication -Filter.KeyLength=15 - - - ############################################################ -#### Root filter interfaces configuration +#### Root filter configuration ############################################################ -# Comment either of these (or delete them) and -# that interface won't be started -RootFilter.realtimeInterfaceName=realtimeclients -RootFilter.dbInterfaceName=database +# The special filter known as the "Root Filter" needs +# a name within the system. +RootFilter.name=root - ############################################################ #### Client interface configuration ############################################################ -# The port for the client interface to listen on +# The client interface listens for connections from clients +# on a TCP port. It is specified here. ClientInterface.listenPort=4510 -# The name for the client interface -ClientInterface.name=realtimeclients -# Access Control List for TCP Clients -ClientInterface.TCPControlChannelACL=DEFAULT:DENY;*.ukc.ac.uk:ALLOW;129.12.*:ALLOW -ClientInterface.TCPDataChannelACL=DEFAULT:DENY;*.ukc.ac.uk:ALLOW;129.12.*:ALLOW +# The client interface needs to connect to another one to +# receive data. There is a special client interface in the +# root filter which is the top of the tree. +ClientInterface.parent=root +# The Client Interface has two channels, control and data. +# Each of these can have an ACL for allowing connections. +# As per previous examples in this configuration file they +# could be set restrictively, but we'll leave them open +# by default +ClientInterface.TCPControlChannelACL=DEFAULT:ALLOW +ClientInterface.TCPDataChannelACL=DEFAULT:ALLOW ############################################################ -#### Database interface configuration +#### LocalClient Configuration ############################################################ -# The name for the Database interface -DBInterface.name=database +# The parent for the Local Client +Client.parent=clientinterface1 - ############################################################ #### Host Configuration ############################################################ -# The default filter for a new host -Host.filter=computingFilter; +# A host can use any filter. We'll set the default filter +# for all hosts to be 'filter1'. This can be a list. +Host.filter=filter1; -# The default update times for a new host -Host.UDPUpdateTime=10 -Host.TCPUpdateTime=60 +# Hosts send data every UDPUpdateTime, in seconds. +Host.UDPUpdateTime=60 -# The services checks to run on a host +# How long a hosts config is valid for (a TTL effectively) +# after this time, in seconds, has passed a host should +# consider it's configuration invalid and check for a new +# configuration as soon as it can. +Host.ConfigTTL=3600 + +# We can run service checks on a host. The java package +# which contains them is specified here. Host.serviceChecksPackage=uk.org.iscream.cms.server.filter.plugins - ############################################################ #### Monitor Configuration ############################################################ -## The monitors to run +# Monitors allow data to be watched for possible problems. +# This setting specifies which java package they're in. Monitor.PluginsPackage=uk.org.iscream.cms.server.client.monitors +# And which ones should be loaded. Monitor.Plugins=CPU;Load;Process;Disk;Memory;Swap;Services;\ - Heartbeat;Queue;UserCount;WebFeeder; + Heartbeat;Queue;UserCount;WebFeeder;DiskIO;\ + NetIO;Paging; -## Default alert timeouts -# these can be override on a per-monitor basis -# timings of the alerts (in seconds) +# Alerts timeout after a given time in seconds and go up +# to the next level. These can be overridden for monitors +# on an individual basis. Monitor.alertTimeout.NOTICE=60 Monitor.alertTimeout.WARNING=900 Monitor.alertTimeout.CAUTION=1800 Monitor.alertTimeout.CRITICAL=3600 +# Is monitoring enabled? (comment or set to 0 to disable) +Monitor.enable=1 + ## Threshold values -# -# note: if the UPPER threshold is broken, -# the alertTimeout values are halved. -# This is to escalate the alert quicker. # +# Here we specify the threshold for each monitor. When +# a threshold is broken an alert is raised. If the +# UPPER threshold is broken the alertTimeout values +# given above are halved to make the alert escalate +# quicker. # CPU monitor threshold values Monitor.CPU.threshold.LOWER=90 -Monitor.CPU.threshold.UPPER=99 +Monitor.CPU.threshold.UPPER=95 +# enable this monitor (comment or set to 0 to disable) +Monitor.CPU.enable=1 - # Load monitor threshold values Monitor.Load.threshold.LOWER=10.0 Monitor.Load.threshold.UPPER=20.0 +# enable this monitor (comment or set to 0 to disable) +Monitor.Load.enable=1 +# Paging monitor threshold values +Monitor.Paging.threshold.LOWER=1000 +Monitor.Paging.threshold.UPPER=5000 +# enable this monitor (comment or set to 0 to disable) +Monitor.Paging.enable=1 # Process Count threshold values -Monitor.Process.threshold.LOWER=1000 -Monitor.Process.threshold.UPPER=1500 +Monitor.Process.threshold.LOWER=500 +Monitor.Process.threshold.UPPER=1000 +# enable this monitor (comment or set to 0 to disable) +Monitor.Process.enable=1 - # Memory monitor threshold values Monitor.Memory.threshold.LOWER=80 Monitor.Memory.threshold.UPPER=90 # Whether to include 'cache' in the 'free' value # (1 is true, anything else is false) Monitor.Memory.useCacheAsFree=1 +# enable this monitor (comment or set to 0 to disable) +Monitor.Memory.enable=1 - # Swap monitor threshold values Monitor.Swap.threshold.LOWER=80 Monitor.Swap.threshold.UPPER=90 +# enable this monitor (comment or set to 0 to disable) +Monitor.Swap.enable=1 - # UserCount monitor threshold values Monitor.UserCount.threshold.LOWER=100 Monitor.UserCount.threshold.UPPER=200 +# enable this monitor (comment or set to 0 to disable) +Monitor.UserCount.enable=1 - # Disk monitor threshold values Monitor.Disk.threshold.LOWER=90 Monitor.Disk.threshold.UPPER=95 +Monitor.DiskInode.threshold.LOWER=90 +Monitor.DiskInode.threshold.UPPER=95 # Thresholds given are PERCENTAGE or VALUE # where PERCENTAGE is a percentage of disk in use # and VALUE is an absolute value of space free Monitor.Disk.thresholdMeasure=PERCENTAGE +Monitor.DiskInode.thresholdMeasure=PERCENTAGE +# enable this monitor (comment or set to 0 to disable) +Monitor.Disk.enable=1 +Monitor.DiskInode.enable=1 +# DiskIO monitor threshold values (in bytes) +Monitor.DiskIO.rbytes.threshold.LOWER=10000000 +Monitor.DiskIO.rbytes.threshold.UPPER=50000000 +Monitor.DiskIO.wbytes.threshold.LOWER=10000000 +Monitor.DiskIO.wbytes.threshold.UPPER=50000000 +# enable this monitor (comment or set to 0 to disable) +Monitor.DiskIO.enable=1 +# NetIO monitor threshold values (in bytes) +Monitor.NetIO.rxbytes.threshold.LOWER=10000000 +Monitor.NetIO.rxbytes.threshold.UPPER=50000000 +Monitor.NetIO.txbytes.threshold.LOWER=10000000 +Monitor.NetIO.txbytes.threshold.UPPER=50000000 +# enable this monitor (comment or set to 0 to disable) +Monitor.NetIO.enable=1 + # Queue monitor threshold values Monitor.Queue.threshold.LOWER=50 Monitor.Queue.threshold.UPPER=80 # Thresholds given are PERCENTAGE or VALUE -# where PERCENTAGE is a percentage of disk in use -# and VALUE is an absolute value of space free +# where PERCENTAGE is a percentage of the queue in use +# and VALUE is an absolute value of free space Monitor.Queue.thresholdMeasure=PERCENTAGE +# enable this monitor (comment or set to 0 to disable) +Monitor.Queue.enable=1 - # Services monitor threshold values # # THE SERVICES MONITOR USES A HACK # WHEREBY IT USES THE SERVICE STATUS -# AS THE THRESHOLD LEVEL. THIS SHOULD +# AS THE THRESHOLD LEVEL. THIS SHOULD # BE LOOKED AT! # # how many times we should be at the highest level before # we go to a FINAL alert Monitor.Services.reachFINALcount=5 - # Heartbeat threshold values # these are values in seconds from when the heartbeat # was *expected* to arrive (ie. last + hearbeat period) @@ -255,14 +351,17 @@ Monitor.Heartbeat.reachFINALcount=5 # list of hosts we expect to have at startup # this ensures we notice hosts that aren't running, not # just those that have gone down recently. -Monitor.Heartbeat.initialHosts=raptor.ukc.ac.uk;myrtle.ukc.ac.uk;kestrel.ukc.ac.uk +#Monitor.Heartbeat.initialHosts=importantserver.example.com;myserver.example.com +# enable this monitor (comment or set to 0 to disable) +Monitor.Heartbeat.enable=1 - ############################################################ #### Alerter Configuration ############################################################ -# The following var's are understood by ALL alerters for messages +# The following variables are understood by ALL alerters +# for messages. They will be substituted for their +# appropriate value. # # %level% - the alert level (eg, WARNING) # %threshold% - the threshold broken (eg, LOWER) @@ -275,32 +374,51 @@ Monitor.Heartbeat.initialHosts=raptor.ukc.ac.uk;myrtle # %timeOfFirstAlert% - the time the first alert was sent # -# alerters to run +# The java package containing the alerters Alerter.PluginsPackage=uk.org.iscream.cms.server.client.alerters -Alerter.Plugins=EMail;IRC;WebFeeder;Logging; +# And the names of the ones to activate +# You might want to turn them all on, but we only activate +# two basic ones by default. +# Alerter.Plugins=EMail;IRC;WebFeeder;Logging; +Alerter.Plugins=WebFeeder;Logging; - -# EMail alerter configuration +# Configuration for the EMail alerter +# The level alerts have to bypass to be e-mailed out Alerter.EMail.level = WARNING -Alerter.EMail.destList = dev@i-scream.org.uk -Alerter.EMail.sender = dev@i-scream.org.uk -Alerter.EMail.smtpServer = mercury.ukc.ac.uk +# A list of people to send alerts to +Alerter.EMail.destList = me@example.com +# The sender of the e-mail +Alerter.EMail.sender = me@example.com +# The SMTP server to send alerts to +Alerter.EMail.smtpServer = smtp.example.com +# The subject line of the e-mail Alerter.EMail.subject = i-scream alert: %level% alert on %source% for %attributeName% +# The message in the e-mail Alerter.EMail.message = The i-scream distributed central monitoring system has\nraised a %level% alert for the host %source%.\n\nThe value for %attributeName% of %value% has exceeded the\n%threshold% threshold value of %thresholdValue%.\n\nThis alert was originally raised at %timeOfFirstAlert%,\nwhich was %timeSinceFirstAlert% ago.\n\nThe next alert (should one occur) will be sent in %timeTillNextAlert%. - -# IRC alerter configuration +# Configuration for the IRC alerter +# The level alerts have to bypass to be sent Alerter.IRC.level = OK -Alerter.IRC.IRCServer = killigrew.ukc.ac.uk +# The IRC server name +Alerter.IRC.IRCServer = irc.example.com +# and it's port Alerter.IRC.IRCPort = 6667 +# A list of nicks to try and be Alerter.IRC.nickList = iscreamBot;_iscreamBot;i-screamBot +# Username to be Alerter.IRC.user = i-scream +# Responses to CTCP prods Alerter.IRC.comment = i-scream alerting bot (based on PircBot) Alerter.IRC.finger = i-scream alerting bot (based on PircBot) +# Channel to join Alerter.IRC.channel = #i-scream +# Message to use when sending an alert Alerter.IRC.message = %level%: %attributeName% on %source% has passed %threshold%(%thresholdValue%) threshold with %value% - time till next alert (should one occur), %timeTillNextAlert% +# How long to wait before reconnecting Alerter.IRC.reconnectDelay = 30 +# Notice to send when starting up Alerter.IRC.startupNotice = i-scream alerting bot activated +# Commands the bot will recognise Alerter.IRC.stopCommand = stop alerts Alerter.IRC.startCommand = start alerts Alerter.IRC.lastAlertCommand = last alert @@ -311,42 +429,57 @@ Alerter.IRC.uptimeCommand = uptime Alerter.IRC.timeSinceLastAlertCommand = time since last alert Alerter.IRC.versionCommand = version Alerter.IRC.helpCommand = help +# Message to send when the bot is addressed and +# doesn't understand the command Alerter.IRC.rejectMessage = sorry, I don't understand your request - -# Logging alerter configuration +# Configuration for the Logging Alerter +# This alerter writes alerts to the i-scream logfile. +# The level alerts have to bypass to be logged Alerter.Logging.level = WARNING +# The message to use when logging the alert Alerter.Logging.message = %level%: %attributeName% on %source% has passed %threshold%(%thresholdValue%) threshold with %value% - ############################################################ #### WebFeeder Configuration ############################################################ -# Define these (one or both_ to turn on the WebFeeder's, -# comment it to turn them off +# The WebFeeder drops alerts and latest data in to a +# directory for the web interface to use. It is essentially +# the link between the web interface and the server. + +# Set these to something to tell the WebFeeder to drop +# latest data, alerts, or both. Comment themt o disable. WebFeeder.latestActive = true WebFeeder.alertActive = true -# Only pass on alerts past this level +# We can make the WebFeeder only drop alerts that go +# past a certain level. The default is "OK", which makes +# all alerts get passed on. Alerter.WebFeeder.level = OK -# Check period in seconds (for cleaning stale/old alerts) +# The WebFeeder cleans out old and stale alerts when it +# performs a check. This setting is the period, in seconds, +# at which a check happens. WebFeeder.checkPeriod = 120 # Delete alerts older than this, in seconds. -# Files are only check every WebFeeder.checkPeriod seconds. WebFeeder.alertDeleteOlderThan = 300 -# Root Path -WebFeeder.rootPath = /usr/local/proj/co600_10/webroot +# Data will be dropped to a subdirectory (specified +# below) of this directory. +WebFeeder.rootPath = /web/i-scream -# Latest Data +# SubDir is the directory below rootPath which latest +# data will be placed in. Below that a directory will +# be created with the machine name, and a file named +# FileName (specified here) will be created containg +# the data. WebFeeder.latestSubDir = latest WebFeeder.latestFileName = latest_data -# Alert Data +# As above, but for the alerts. WebFeeder.alertSubDir = alert WebFeeder.alertFileName = alert_data