5 |
|
# configuration system has this file as its |
6 |
|
# root of configuration |
7 |
|
# |
8 |
+ |
# This is a working sample configuration, which |
9 |
+ |
# aims to provide a working "out of the box" |
10 |
+ |
# setup, aswell as a some example configuration. |
11 |
+ |
# |
12 |
|
# $Author$ |
13 |
|
# $Id$ |
14 |
|
# |
15 |
+ |
# http://www.i-scream.org/ |
16 |
+ |
# |
17 |
+ |
|
18 |
|
############################################################ |
19 |
|
#### Configuration Files & Groups |
20 |
|
############################################################ |
21 |
|
|
22 |
< |
# for computing machines |
23 |
< |
group.computing=Host.129.12.4.*; |
24 |
< |
group.rocks=Host.agate.ukc.ac.uk;Host.arkose.ukc.ac.uk;Host.basalt.ukc.ac.uk;Host.chalk.ukc.ac.uk;\ |
25 |
< |
Host.chert.ukc.ac.uk;Host.granite.ukc.ac.uk;Host.jade.ukc.ac.uk;Host.jasper.ukc.ac.uk;\ |
26 |
< |
Host.magnetite.ukc.ac.uk;Host.obsidian.ukc.ac.uk;Host.pumice.ukc.ac.uk;\ |
20 |
< |
Host.pyrite.ukc.ac.uk;Host.slate.ukc.ac.uk;Host.topaz.ukc.ac.uk; |
21 |
< |
group.compsoc=Host.compsoc1.ukc.ac.uk |
22 |
> |
# All components, such as hosts, will take their configuration |
23 |
> |
# as given in this file. Unless, that is, they have an extra |
24 |
> |
# configuration file specified here, or they're in a group |
25 |
> |
# that has a configuration file. If this is the case the extra |
26 |
> |
# configuration will override what is given in this file. |
27 |
|
|
28 |
< |
config.computing=computing.conf |
29 |
< |
config.rocks=rocks.conf |
30 |
< |
config.compsoc1=rocks.conf |
28 |
> |
# We can define an extra configuration file for any component |
29 |
> |
# of the system. This is done by specifying the following: |
30 |
> |
# |
31 |
> |
# config.component=someconfigfile.conf |
32 |
> |
# |
33 |
> |
# Component names will be defined in the documentation for |
34 |
> |
# the component. An example is that a host component is |
35 |
> |
# known by the name "Host.hostname" or "Host.ip". |
36 |
> |
# It is also possible to use the wildcard "*" in a |
37 |
> |
# hostname or IP address. |
38 |
|
|
39 |
< |
# for library machines |
40 |
< |
group.library=Host.129.12.58.*; |
41 |
< |
config.library=library.conf |
39 |
> |
# This defines that server.example.com has some extra |
40 |
> |
# configuration in the file 'server.conf'. |
41 |
> |
config.Host.server.example.com=exserver.conf |
42 |
|
|
43 |
< |
# for SSB machines |
44 |
< |
group.ssb=Host.stue*.ukc.ac.uk; |
45 |
< |
config.ssb=ssb.conf |
43 |
> |
# All webservers are placed in a group named 'www'. |
44 |
> |
group.www=Host.www1.example.com;Host.www2.example.com |
45 |
> |
# Then we define some extra config for them. |
46 |
> |
config.www=webservers.conf |
47 |
|
|
48 |
< |
# mySQL configuration |
49 |
< |
config.mySQL=mySQL.conf |
48 |
> |
# Our workstations are in their own subnet. |
49 |
> |
group.workstations=Host.192.168.10.*; |
50 |
> |
# They can have their own config too. |
51 |
> |
config.workstations=workstations.conf |
52 |
|
|
53 |
+ |
# Any hosts not specified by now have the default |
54 |
+ |
# config as given in this file. |
55 |
+ |
|
56 |
+ |
|
57 |
|
############################################################ |
58 |
|
#### Misc system-wide configuration |
59 |
|
############################################################ |
60 |
|
|
61 |
< |
# the time (in seconds) that checks should be made for |
62 |
< |
# changes in configuration |
61 |
> |
# The ConfigurationProxy caches configuration for various |
62 |
> |
# parts of the server. This setting defines how often, in |
63 |
> |
# seconds, the proxy will check if the data it contains |
64 |
> |
# is still valid. |
65 |
|
ConfigurationProxy.updateTime=60 |
66 |
|
|
67 |
< |
# The time interval at which the Queue's should be monitored |
68 |
< |
Queue.MonitorInterval=15 |
69 |
< |
# The upper limit for a Queue |
67 |
> |
# The internal Queue's can be monitored. This specifies |
68 |
> |
# the interval, in seconds, at which their state should |
69 |
> |
# be monitored. |
70 |
> |
Queue.MonitorInterval=300 |
71 |
> |
# The maximum size a Queue can be before items are |
72 |
> |
# dropped when new ones arrive. |
73 |
|
Queue.SizeLimit=1000 |
74 |
< |
# The removal algorithm |
74 |
> |
# And which items we should drop when the queue is full |
75 |
> |
# and new items arrive. Choices are: |
76 |
> |
# RANDOM - drop a random item from the queue |
77 |
> |
# FIRST - drop the first item from the queue |
78 |
> |
# LAST - drop the last item from the queue |
79 |
> |
# DROP - drop the new item from the queue |
80 |
|
Queue.RemoveAlgorithm=FIRST |
81 |
|
|
82 |
+ |
# The XMLCache caches data passing through the system. |
83 |
+ |
# This setting specifies the time, in seconds, at which |
84 |
+ |
# the caches contents will be rotated. In real terms |
85 |
+ |
# this means the minimum age at which an XMLPacket can |
86 |
+ |
# be before it is up for deletion from the cache. |
87 |
+ |
XMLCache.cleanupPeriod=30 |
88 |
|
|
89 |
+ |
|
90 |
|
############################################################ |
91 |
|
#### FilterManager configuration |
92 |
|
############################################################ |
93 |
|
|
94 |
< |
# The default port to listen for new host connections |
94 |
> |
# The FilterManager accepts connections from hosts |
95 |
> |
# upon startup. It provides them with configuration |
96 |
> |
# and details of which filter to talk to. This setting |
97 |
> |
# specifies which port (TCP) it listens on. |
98 |
|
FilterManager.listenPort=4567 |
99 |
|
|
100 |
+ |
# An Access Control List for the FilterManager. One |
101 |
+ |
# might consider something like this: |
102 |
+ |
# |
103 |
+ |
# FilterManager.ACL=DEFAULT:DENY;*.example.com:ALLOW |
104 |
+ |
FilterManager.ACL=DEFAULT:ALLOW |
105 |
|
|
106 |
|
|
107 |
|
############################################################ |
108 |
|
#### Filter configuration |
109 |
|
############################################################ |
110 |
|
|
111 |
< |
# The default port for Filter's to listen on |
111 |
> |
# A Filter accepts data from hosts on a UDP port. It is |
112 |
> |
# specified here. |
113 |
|
Filter.UDPListenPort=4589 |
69 |
– |
Filter.TCPListenPort=4589 |
114 |
|
|
115 |
< |
# Plugin Information |
116 |
< |
Filter.PluginsPackage=uk.org.iscream.filter.plugins |
117 |
< |
Filter.Plugins=TypeChecker;EnforceEssentialData |
115 |
> |
# Filters contain plugins for checking the data passing |
116 |
> |
# through them. This plugins do the job of "filtering". |
117 |
> |
# This setting specifies the java package they're |
118 |
> |
# contained in. |
119 |
> |
Filter.PluginsPackage=uk.org.iscream.cms.server.filter.plugins |
120 |
> |
# And the names of the plugins to be loaded. |
121 |
> |
Filter.Plugins=TypeChecker;EnforceEssentialData;SourceChecker |
122 |
|
|
123 |
< |
# The name for the root filter |
124 |
< |
RootFilter.name=root |
125 |
< |
|
126 |
< |
# The default parent filter (should almost always be the same as Filter.rootFilter) |
123 |
> |
# Every Filter, except the root, needs a parent to |
124 |
> |
# send their data to. This is usually the root filter, |
125 |
> |
# although in more complex setups it might be another |
126 |
> |
# filter - giving a tree structure. |
127 |
|
Filter.parentFilter=root |
128 |
|
|
129 |
+ |
# A Filter can accept data through UDP, or from |
130 |
+ |
# an upstream Filter (CORBA). These methods can be |
131 |
+ |
# individually turned of on a filter. The default |
132 |
+ |
# is to have them all on. |
133 |
+ |
# (comment or set to 0 to disable) |
134 |
+ |
Filter.ActivateUDPReader=1 |
135 |
+ |
Filter.ActivateCORBAReader=1 |
136 |
|
|
137 |
+ |
# We can also specify Access Control Lists for the |
138 |
+ |
# UDP input of the Filters. One might want something |
139 |
+ |
# restrictive like this: |
140 |
+ |
# |
141 |
+ |
# Filter.UDPACL=DEFAULT:DENY;*.example.com:ALLOW |
142 |
+ |
Filter.UDPACL=DEFAULT:ALLOW |
143 |
|
|
144 |
+ |
# A Filter plugin called the SourceChecker verifies |
145 |
+ |
# that packets come from permitted hosts. It is |
146 |
+ |
# given an ACL to check the hosts against. A possible |
147 |
+ |
# ACL might be: |
148 |
+ |
# |
149 |
+ |
# Filter.SourceCheckerPluginACL=DEFAULT:DENY;*.example.com:ALLOW |
150 |
+ |
Filter.SourceCheckerPluginACL=DEFAULT:ALLOW |
151 |
+ |
|
152 |
+ |
|
153 |
|
############################################################ |
154 |
< |
#### Root filter interfaces configuration |
154 |
> |
#### Root filter configuration |
155 |
|
############################################################ |
156 |
|
|
157 |
< |
# Comment either of these (or delete them) and |
158 |
< |
# that interface won't be started |
159 |
< |
RootFilter.realtimeInterfaceName=realtimeclients |
90 |
< |
RootFilter.dbInterfaceName=database |
157 |
> |
# The special filter known as the "Root Filter" needs |
158 |
> |
# a name within the system. |
159 |
> |
RootFilter.name=root |
160 |
|
|
161 |
|
|
93 |
– |
|
162 |
|
############################################################ |
163 |
|
#### Client interface configuration |
164 |
|
############################################################ |
165 |
|
|
166 |
< |
# The port for the client interface to listen on |
166 |
> |
# The client interface listens for connections from clients |
167 |
> |
# on a TCP port. It is specified here. |
168 |
|
ClientInterface.listenPort=4510 |
169 |
|
|
170 |
+ |
# The client interface needs to connect to another one to |
171 |
+ |
# receive data. There is a special client interface in the |
172 |
+ |
# root filter which is the top of the tree. |
173 |
+ |
ClientInterface.parent=root |
174 |
|
|
175 |
+ |
# The Client Interface has two channels, control and data. |
176 |
+ |
# Each of these can have an ACL for allowing connections. |
177 |
+ |
# As per previous examples in this configuration file they |
178 |
+ |
# could be set restrictively, but we'll leave them open |
179 |
+ |
# by default |
180 |
+ |
ClientInterface.TCPControlChannelACL=DEFAULT:ALLOW |
181 |
+ |
ClientInterface.TCPDataChannelACL=DEFAULT:ALLOW |
182 |
|
|
183 |
+ |
|
184 |
|
############################################################ |
185 |
+ |
#### LocalClient Configuration |
186 |
+ |
############################################################ |
187 |
+ |
|
188 |
+ |
# The parent for the Local Client |
189 |
+ |
Client.parent=clientinterface1 |
190 |
+ |
|
191 |
+ |
|
192 |
+ |
############################################################ |
193 |
|
#### Host Configuration |
194 |
|
############################################################ |
195 |
|
|
196 |
< |
# The default filter for a new host |
197 |
< |
Host.filter=computingFilter; |
196 |
> |
# A host can use any filter. We'll set the default filter |
197 |
> |
# for all hosts to be 'filter1'. This can be a list. |
198 |
> |
Host.filter=filter1; |
199 |
|
|
200 |
< |
# The default update times for a new host |
201 |
< |
Host.UDPUpdateTime=10 |
112 |
< |
Host.TCPUpdateTime=60 |
113 |
< |
Host.AveragerUpdateTime=5 |
200 |
> |
# Hosts send data every UDPUpdateTime, in seconds. |
201 |
> |
Host.UDPUpdateTime=60 |
202 |
|
|
203 |
< |
# The services checks to run on a host |
204 |
< |
Host.serviceChecksPackage=uk.org.iscream.filter.plugins |
203 |
> |
# How long a hosts config is valid for (a TTL effectively) |
204 |
> |
# after this time, in seconds, has passed a host should |
205 |
> |
# consider it's configuration invalid and check for a new |
206 |
> |
# configuration as soon as it can. |
207 |
> |
Host.ConfigTTL=3600 |
208 |
|
|
209 |
+ |
# We can run service checks on a host. The java package |
210 |
+ |
# which contains them is specified here. |
211 |
+ |
Host.serviceChecksPackage=uk.org.iscream.cms.server.filter.plugins |
212 |
|
|
213 |
|
|
214 |
|
############################################################ |
215 |
|
#### Monitor Configuration |
216 |
|
############################################################ |
217 |
|
|
218 |
< |
# the monitors to run |
219 |
< |
Monitor.PluginsPackage=uk.org.iscream.client.monitors |
220 |
< |
Monitor.Plugins=CPU;Disk;Memory;Swap;Services;Heartbeat;WebFeeder; |
221 |
< |
#Monitor.Plugins=CPU;WebFeeder; |
222 |
< |
#Monitor.Plugins= |
218 |
> |
# Monitors allow data to be watched for possible problems. |
219 |
> |
# This setting specifies which java package they're in. |
220 |
> |
Monitor.PluginsPackage=uk.org.iscream.cms.server.client.monitors |
221 |
> |
# And which ones should be loaded. |
222 |
> |
Monitor.Plugins=CPU;Load;Process;Disk;Memory;Swap;Services;\ |
223 |
> |
Heartbeat;Queue;UserCount;WebFeeder;DiskIO;\ |
224 |
> |
NetIO;Paging; |
225 |
|
|
226 |
+ |
# Alerts timeout after a given time in seconds and go up |
227 |
+ |
# to the next level. These can be overridden for monitors |
228 |
+ |
# on an individual basis. |
229 |
+ |
Monitor.alertTimeout.NOTICE=60 |
230 |
+ |
Monitor.alertTimeout.WARNING=900 |
231 |
+ |
Monitor.alertTimeout.CAUTION=1800 |
232 |
+ |
Monitor.alertTimeout.CRITICAL=3600 |
233 |
|
|
234 |
< |
## CPU monitor configuration |
235 |
< |
# threshold values |
236 |
< |
# |
237 |
< |
# note: if the UPPER threshold is broken, |
238 |
< |
# the alertTimeout values are halved. |
239 |
< |
# This is to escalate the alert quicker. |
234 |
> |
# Is monitoring enabled? (comment or set to 0 to disable) |
235 |
> |
Monitor.enable=1 |
236 |
> |
|
237 |
> |
## Threshold values |
238 |
> |
# |
239 |
> |
# Here we specify the threshold for each monitor. When |
240 |
> |
# a threshold is broken an alert is raised. If the |
241 |
> |
# UPPER threshold is broken the alertTimeout values |
242 |
> |
# given above are halved to make the alert escalate |
243 |
> |
# quicker. |
244 |
> |
|
245 |
> |
# CPU monitor threshold values |
246 |
|
Monitor.CPU.threshold.LOWER=90 |
247 |
< |
Monitor.CPU.threshold.UPPER=99 |
247 |
> |
Monitor.CPU.threshold.UPPER=95 |
248 |
> |
# enable this monitor (comment or set to 0 to disable) |
249 |
> |
Monitor.CPU.enable=1 |
250 |
|
|
251 |
< |
# timings of the alerts (in seconds) |
252 |
< |
Monitor.CPU.alertTimeout.NOTICE=60 |
253 |
< |
Monitor.CPU.alertTimeout.WARNING=900 |
254 |
< |
Monitor.CPU.alertTimeout.CAUTION=1800 |
255 |
< |
Monitor.CPU.alertTimeout.CRITICAL=3600 |
251 |
> |
# Load monitor threshold values |
252 |
> |
Monitor.Load.threshold.LOWER=10.0 |
253 |
> |
Monitor.Load.threshold.UPPER=20.0 |
254 |
> |
# enable this monitor (comment or set to 0 to disable) |
255 |
> |
Monitor.Load.enable=1 |
256 |
|
|
257 |
+ |
# Paging monitor threshold values |
258 |
+ |
Monitor.Paging.threshold.LOWER=1000 |
259 |
+ |
Monitor.Paging.threshold.UPPER=5000 |
260 |
+ |
# enable this monitor (comment or set to 0 to disable) |
261 |
+ |
Monitor.Paging.enable=1 |
262 |
|
|
263 |
< |
## Memory monitor configuration |
264 |
< |
# threshold values |
265 |
< |
# |
266 |
< |
# note: if the UPPER threshold is broken, |
267 |
< |
# the alertTimeout values are halved. |
268 |
< |
# This is to escalate the alert quicker. |
263 |
> |
# Process Count threshold values |
264 |
> |
Monitor.Process.threshold.LOWER=500 |
265 |
> |
Monitor.Process.threshold.UPPER=1000 |
266 |
> |
# enable this monitor (comment or set to 0 to disable) |
267 |
> |
Monitor.Process.enable=1 |
268 |
> |
|
269 |
> |
# Memory monitor threshold values |
270 |
|
Monitor.Memory.threshold.LOWER=80 |
271 |
|
Monitor.Memory.threshold.UPPER=90 |
272 |
+ |
# Whether to include 'cache' in the 'free' value |
273 |
+ |
# (1 is true, anything else is false) |
274 |
+ |
Monitor.Memory.useCacheAsFree=1 |
275 |
+ |
# enable this monitor (comment or set to 0 to disable) |
276 |
+ |
Monitor.Memory.enable=1 |
277 |
|
|
278 |
< |
# timings of the alerts (in seconds) |
157 |
< |
Monitor.Memory.alertTimeout.NOTICE=60 |
158 |
< |
Monitor.Memory.alertTimeout.WARNING=900 |
159 |
< |
Monitor.Memory.alertTimeout.CAUTION=1800 |
160 |
< |
Monitor.Memory.alertTimeout.CRITICAL=3600 |
161 |
< |
|
162 |
< |
|
163 |
< |
## Swap monitor configuration |
164 |
< |
# threshold values |
165 |
< |
# |
166 |
< |
# note: if the UPPER threshold is broken, |
167 |
< |
# the alertTimeout values are halved. |
168 |
< |
# This is to escalate the alert quicker. |
278 |
> |
# Swap monitor threshold values |
279 |
|
Monitor.Swap.threshold.LOWER=80 |
280 |
|
Monitor.Swap.threshold.UPPER=90 |
281 |
+ |
# enable this monitor (comment or set to 0 to disable) |
282 |
+ |
Monitor.Swap.enable=1 |
283 |
|
|
284 |
< |
# timings of the alerts (in seconds) |
285 |
< |
Monitor.Swap.alertTimeout.NOTICE=60 |
286 |
< |
Monitor.Swap.alertTimeout.WARNING=900 |
287 |
< |
Monitor.Swap.alertTimeout.CAUTION=1800 |
288 |
< |
Monitor.Swap.alertTimeout.CRITICAL=3600 |
284 |
> |
# UserCount monitor threshold values |
285 |
> |
Monitor.UserCount.threshold.LOWER=100 |
286 |
> |
Monitor.UserCount.threshold.UPPER=200 |
287 |
> |
# enable this monitor (comment or set to 0 to disable) |
288 |
> |
Monitor.UserCount.enable=1 |
289 |
|
|
290 |
< |
|
179 |
< |
## Disk monitor configuration |
180 |
< |
# threshold values |
181 |
< |
# |
182 |
< |
# note: if the UPPER threshold is broken, |
183 |
< |
# the alertTimeout values are halved. |
184 |
< |
# This is to escalate the alert quicker. |
290 |
> |
# Disk monitor threshold values |
291 |
|
Monitor.Disk.threshold.LOWER=90 |
292 |
|
Monitor.Disk.threshold.UPPER=95 |
293 |
+ |
Monitor.DiskInode.threshold.LOWER=90 |
294 |
+ |
Monitor.DiskInode.threshold.UPPER=95 |
295 |
+ |
# Thresholds given are PERCENTAGE or VALUE |
296 |
+ |
# where PERCENTAGE is a percentage of disk in use |
297 |
+ |
# and VALUE is an absolute value of space free |
298 |
+ |
Monitor.Disk.thresholdMeasure=PERCENTAGE |
299 |
+ |
Monitor.DiskInode.thresholdMeasure=PERCENTAGE |
300 |
+ |
# enable this monitor (comment or set to 0 to disable) |
301 |
+ |
Monitor.Disk.enable=1 |
302 |
+ |
Monitor.DiskInode.enable=1 |
303 |
|
|
304 |
< |
# timings of the alerts (in seconds) |
305 |
< |
Monitor.Disk.alertTimeout.NOTICE=60 |
306 |
< |
Monitor.Disk.alertTimeout.WARNING=900 |
307 |
< |
Monitor.Disk.alertTimeout.CAUTION=1800 |
308 |
< |
Monitor.Disk.alertTimeout.CRITICAL=3600 |
304 |
> |
# DiskIO monitor threshold values (in bytes) |
305 |
> |
Monitor.DiskIO.rbytes.threshold.LOWER=10000000 |
306 |
> |
Monitor.DiskIO.rbytes.threshold.UPPER=50000000 |
307 |
> |
Monitor.DiskIO.wbytes.threshold.LOWER=10000000 |
308 |
> |
Monitor.DiskIO.wbytes.threshold.UPPER=50000000 |
309 |
> |
# enable this monitor (comment or set to 0 to disable) |
310 |
> |
Monitor.DiskIO.enable=1 |
311 |
|
|
312 |
+ |
# NetIO monitor threshold values (in bytes) |
313 |
+ |
Monitor.NetIO.rxbytes.threshold.LOWER=10000000 |
314 |
+ |
Monitor.NetIO.rxbytes.threshold.UPPER=50000000 |
315 |
+ |
Monitor.NetIO.txbytes.threshold.LOWER=10000000 |
316 |
+ |
Monitor.NetIO.txbytes.threshold.UPPER=50000000 |
317 |
+ |
# enable this monitor (comment or set to 0 to disable) |
318 |
+ |
Monitor.NetIO.enable=1 |
319 |
|
|
320 |
< |
## Services monitor configuration |
321 |
< |
# threshold values |
320 |
> |
# Queue monitor threshold values |
321 |
> |
Monitor.Queue.threshold.LOWER=50 |
322 |
> |
Monitor.Queue.threshold.UPPER=80 |
323 |
> |
# Thresholds given are PERCENTAGE or VALUE |
324 |
> |
# where PERCENTAGE is a percentage of the queue in use |
325 |
> |
# and VALUE is an absolute value of free space |
326 |
> |
Monitor.Queue.thresholdMeasure=PERCENTAGE |
327 |
> |
# enable this monitor (comment or set to 0 to disable) |
328 |
> |
Monitor.Queue.enable=1 |
329 |
> |
|
330 |
> |
# Services monitor threshold values |
331 |
|
# |
332 |
|
# THE SERVICES MONITOR USES A HACK |
333 |
|
# WHEREBY IT USES THE SERVICE STATUS |
334 |
< |
# AS THE THRESHOLD LEVEL. THIS SHOULD |
334 |
> |
# AS THE THRESHOLD LEVEL. THIS SHOULD |
335 |
|
# BE LOOKED AT! |
336 |
|
# |
337 |
< |
# note: if the UPPER threshold is broken, |
338 |
< |
# the alertTimeout values are halved. |
205 |
< |
# This is to escalate the alert quicker. |
206 |
< |
#Monitor.Services.threshold.LOWER=120 |
207 |
< |
#Monitor.Services.threshold.LOWER=300 |
208 |
< |
|
209 |
< |
# timings of the alerts (in seconds) |
210 |
< |
Monitor.Services.alertTimeout.NOTICE=60 |
211 |
< |
Monitor.Services.alertTimeout.WARNING=900 |
212 |
< |
Monitor.Services.alertTimeout.CAUTION=1800 |
213 |
< |
Monitor.Services.alertTimeout.CRITICAL=3600 |
214 |
< |
|
215 |
< |
# Make sure we shut up eventually |
337 |
> |
# how many times we should be at the highest level before |
338 |
> |
# we go to a FINAL alert |
339 |
|
Monitor.Services.reachFINALcount=5 |
340 |
|
|
341 |
< |
|
219 |
< |
## Heartbeat monitor configuration |
220 |
< |
# threshold values |
221 |
< |
# |
222 |
< |
# note: if the UPPER threshold is broken, |
223 |
< |
# the alertTimeout values are halved. |
224 |
< |
# This is to escalate the alert quicker. |
225 |
< |
Monitor.Heartbeat.checkPeriod=50 |
341 |
> |
# Heartbeat threshold values |
342 |
|
# these are values in seconds from when the heartbeat |
343 |
|
# was *expected* to arrive (ie. last + hearbeat period) |
344 |
< |
Monitor.Heartbeat.threshold.LOWER=120 |
344 |
> |
Monitor.Heartbeat.threshold.LOWER=180 |
345 |
|
Monitor.Heartbeat.threshold.UPPER=300 |
346 |
< |
|
347 |
< |
# timings of the alerts (in seconds) |
348 |
< |
Monitor.Heartbeat.alertTimeout.NOTICE=60 |
349 |
< |
Monitor.Heartbeat.alertTimeout.WARNING=900 |
234 |
< |
Monitor.Heartbeat.alertTimeout.CAUTION=1800 |
235 |
< |
Monitor.Heartbeat.alertTimeout.CRITICAL=3600 |
236 |
< |
|
237 |
< |
# this means it will reach a FINAL level, |
238 |
< |
# and thus the host will be removed from the monitor list, |
239 |
< |
# if the highest level alert is reached and occours this |
240 |
< |
# number of times. |
241 |
< |
# |
242 |
< |
# int this setup, that means 5 CRITICAL alerts would occour |
243 |
< |
# on a Heartbeat before reaching FINAL. |
244 |
< |
# If this value is wrong or not present, FINAL is never reached |
245 |
< |
# |
346 |
> |
# this is how often we will run a "check" of heartbeats |
347 |
> |
Monitor.Heartbeat.checkPeriod=50 |
348 |
> |
# how many times we should be at the highest level before |
349 |
> |
# we go to a FINAL alert |
350 |
|
Monitor.Heartbeat.reachFINALcount=5 |
351 |
+ |
# list of hosts we expect to have at startup |
352 |
+ |
# this ensures we notice hosts that aren't running, not |
353 |
+ |
# just those that have gone down recently. |
354 |
+ |
#Monitor.Heartbeat.initialHosts=importantserver.example.com;myserver.example.com |
355 |
+ |
# enable this monitor (comment or set to 0 to disable) |
356 |
+ |
Monitor.Heartbeat.enable=1 |
357 |
|
|
358 |
+ |
|
359 |
|
############################################################ |
360 |
|
#### Alerter Configuration |
361 |
|
############################################################ |
362 |
< |
# The following var's are understood by ALL alerters for messages |
362 |
> |
# The following variables are understood by ALL alerters |
363 |
> |
# for messages. They will be substituted for their |
364 |
> |
# appropriate value. |
365 |
|
# |
366 |
|
# %level% - the alert level (eg, WARNING) |
367 |
|
# %threshold% - the threshold broken (eg, LOWER) |
374 |
|
# %timeOfFirstAlert% - the time the first alert was sent |
375 |
|
# |
376 |
|
|
377 |
< |
# alerters to run |
378 |
< |
Alerter.PluginsPackage=uk.org.iscream.client.alerters |
379 |
< |
#Alerter.Plugins=EMail;IRC;WebFeeder; |
380 |
< |
Alerter.Plugins=IRC;WebFeeder; |
381 |
< |
#Alerter.Plugins=WebFeeder; |
382 |
< |
#Alerter.Plugins= |
377 |
> |
# The java package containing the alerters |
378 |
> |
Alerter.PluginsPackage=uk.org.iscream.cms.server.client.alerters |
379 |
> |
# And the names of the ones to activate |
380 |
> |
# You might want to turn them all on, but we only activate |
381 |
> |
# two basic ones by default. |
382 |
> |
# Alerter.Plugins=EMail;IRC;WebFeeder;Logging; |
383 |
> |
Alerter.Plugins=WebFeeder;Logging; |
384 |
|
|
385 |
< |
# EMail alerter configuration |
385 |
> |
# Configuration for the EMail alerter |
386 |
> |
# The level alerts have to bypass to be e-mailed out |
387 |
|
Alerter.EMail.level = WARNING |
388 |
< |
Alerter.EMail.destList = dev@i-scream.org.uk |
389 |
< |
Alerter.EMail.sender = dev@i-scream.org.uk |
390 |
< |
Alerter.EMail.smtpServer = mercury.ukc.ac.uk |
388 |
> |
# A list of people to send alerts to |
389 |
> |
Alerter.EMail.destList = me@example.com |
390 |
> |
# The sender of the e-mail |
391 |
> |
Alerter.EMail.sender = me@example.com |
392 |
> |
# The SMTP server to send alerts to |
393 |
> |
Alerter.EMail.smtpServer = smtp.example.com |
394 |
> |
# The subject line of the e-mail |
395 |
|
Alerter.EMail.subject = i-scream alert: %level% alert on %source% for %attributeName% |
396 |
+ |
# The message in the e-mail |
397 |
|
Alerter.EMail.message = The i-scream distributed central monitoring system has\nraised a %level% alert for the host %source%.\n\nThe value for %attributeName% of %value% has exceeded the\n%threshold% threshold value of %thresholdValue%.\n\nThis alert was originally raised at %timeOfFirstAlert%,\nwhich was %timeSinceFirstAlert% ago.\n\nThe next alert (should one occur) will be sent in %timeTillNextAlert%. |
398 |
|
|
399 |
< |
|
400 |
< |
# IRC alerter configuration |
399 |
> |
# Configuration for the IRC alerter |
400 |
> |
# The level alerts have to bypass to be sent |
401 |
|
Alerter.IRC.level = OK |
402 |
< |
Alerter.IRC.IRCServer = killigrew.ukc.ac.uk |
402 |
> |
# The IRC server name |
403 |
> |
Alerter.IRC.IRCServer = irc.example.com |
404 |
> |
# and it's port |
405 |
|
Alerter.IRC.IRCPort = 6667 |
406 |
+ |
# A list of nicks to try and be |
407 |
|
Alerter.IRC.nickList = iscreamBot;_iscreamBot;i-screamBot |
408 |
+ |
# Username to be |
409 |
|
Alerter.IRC.user = i-scream |
410 |
< |
Alerter.IRC.comment = i-scream alerting bot |
410 |
> |
# Responses to CTCP prods |
411 |
> |
Alerter.IRC.comment = i-scream alerting bot (based on PircBot) |
412 |
> |
Alerter.IRC.finger = i-scream alerting bot (based on PircBot) |
413 |
> |
# Channel to join |
414 |
|
Alerter.IRC.channel = #i-scream |
415 |
+ |
# Message to use when sending an alert |
416 |
|
Alerter.IRC.message = %level%: %attributeName% on %source% has passed %threshold%(%thresholdValue%) threshold with %value% - time till next alert (should one occur), %timeTillNextAlert% |
417 |
+ |
# How long to wait before reconnecting |
418 |
|
Alerter.IRC.reconnectDelay = 30 |
419 |
+ |
# Notice to send when starting up |
420 |
|
Alerter.IRC.startupNotice = i-scream alerting bot activated |
421 |
+ |
# Commands the bot will recognise |
422 |
|
Alerter.IRC.stopCommand = stop alerts |
423 |
|
Alerter.IRC.startCommand = start alerts |
424 |
|
Alerter.IRC.lastAlertCommand = last alert |
429 |
|
Alerter.IRC.timeSinceLastAlertCommand = time since last alert |
430 |
|
Alerter.IRC.versionCommand = version |
431 |
|
Alerter.IRC.helpCommand = help |
432 |
+ |
# Message to send when the bot is addressed and |
433 |
+ |
# doesn't understand the command |
434 |
|
Alerter.IRC.rejectMessage = sorry, I don't understand your request |
435 |
|
|
436 |
+ |
# Configuration for the Logging Alerter |
437 |
+ |
# This alerter writes alerts to the i-scream logfile. |
438 |
+ |
# The level alerts have to bypass to be logged |
439 |
+ |
Alerter.Logging.level = WARNING |
440 |
+ |
# The message to use when logging the alert |
441 |
+ |
Alerter.Logging.message = %level%: %attributeName% on %source% has passed %threshold%(%thresholdValue%) threshold with %value% |
442 |
+ |
|
443 |
+ |
|
444 |
|
############################################################ |
445 |
|
#### WebFeeder Configuration |
446 |
|
############################################################ |
447 |
|
|
448 |
< |
# Define these (one or both_ to turn on the WebFeeder's, |
449 |
< |
# comment it to turn them off |
448 |
> |
# The WebFeeder drops alerts and latest data in to a |
449 |
> |
# directory for the web interface to use. It is essentially |
450 |
> |
# the link between the web interface and the server. |
451 |
> |
|
452 |
> |
# Set these to something to tell the WebFeeder to drop |
453 |
> |
# latest data, alerts, or both. Comment themt o disable. |
454 |
|
WebFeeder.latestActive = true |
455 |
|
WebFeeder.alertActive = true |
456 |
|
|
457 |
< |
# Only pass on alerts past this level |
458 |
< |
WebFeeder.alertLevel = OK |
457 |
> |
# We can make the WebFeeder only drop alerts that go |
458 |
> |
# past a certain level. The default is "OK", which makes |
459 |
> |
# all alerts get passed on. |
460 |
> |
Alerter.WebFeeder.level = OK |
461 |
|
|
462 |
< |
# Check period in seconds (for cleaning stale/old alerts) |
462 |
> |
# The WebFeeder cleans out old and stale alerts when it |
463 |
> |
# performs a check. This setting is the period, in seconds, |
464 |
> |
# at which a check happens. |
465 |
|
WebFeeder.checkPeriod = 120 |
466 |
|
|
467 |
|
# Delete alerts older than this, in seconds. |
319 |
– |
# Files are only check every WebFeeder.checkPeriod seconds. |
468 |
|
WebFeeder.alertDeleteOlderThan = 300 |
469 |
|
|
470 |
< |
# Root Path |
471 |
< |
WebFeeder.rootPath = /usr/local/proj/co600_10/webroot |
470 |
> |
# Data will be dropped to a subdirectory (specified |
471 |
> |
# below) of this directory. |
472 |
> |
WebFeeder.rootPath = /web/i-scream |
473 |
|
|
474 |
< |
# Latest Data |
474 |
> |
# SubDir is the directory below rootPath which latest |
475 |
> |
# data will be placed in. Below that a directory will |
476 |
> |
# be created with the machine name, and a file named |
477 |
> |
# FileName (specified here) will be created containg |
478 |
> |
# the data. |
479 |
|
WebFeeder.latestSubDir = latest |
480 |
|
WebFeeder.latestFileName = latest_data |
481 |
|
|
482 |
< |
# Alert Data |
482 |
> |
# As above, but for the alerts. |
483 |
|
WebFeeder.alertSubDir = alert |
484 |
|
WebFeeder.alertFileName = alert_data |
485 |
|
|
333 |
– |
############################################################ |
486 |
|
|
487 |
+ |
############################################################ |