ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
Revision: 1.22
Committed: Sat May 18 18:16:00 2002 UTC (22 years ago) by tdb
Branch: MAIN
Changes since 1.21: +22 -3 lines
Log Message:
i-scream is now licensed under the GPL. I've added the GPL headers to every
source file, and put a full copy of the license in the appropriate places.
I think I've covered everything. This is going to be a mad commit ;)

File Contents

# Content
1 /*
2 * i-scream central monitoring system
3 * Copyright (C) 2000-2002 i-scream
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 */
19
20 //---PACKAGE DECLARATION---
21 package uk.org.iscream.cms.server.client.monitors;
22
23 //---IMPORTS---
24 import java.util.HashMap;
25 import java.util.Iterator;
26 import java.util.StringTokenizer;
27 import uk.org.iscream.cms.server.client.*;
28 import uk.org.iscream.cms.server.core.*;
29 import uk.org.iscream.cms.server.util.*;
30 import uk.org.iscream.cms.server.componentmanager.*;
31
32 /**
33 * This Monitor watches heartbeats.
34 * It generates an alert when a heartbeat that is expected
35 * does not arrive. Unlike all the other monitors, this one
36 * is driven by an event *not* occuring, rather than an
37 * event occuring. This means it must be actively checking
38 * for missing heartbeat's, and thus has an extra inner class
39 * thread.
40 *
41 * @author $Author: tdb $
42 * @version $Id: Heartbeat__Monitor.java,v 1.21 2001/11/26 12:56:33 tdb Exp $
43 */
44 public class Heartbeat__Monitor extends MonitorSkeleton {
45
46 //---FINAL ATTRIBUTES---
47
48 /**
49 * The current CVS revision of this class
50 */
51 public final String REVISION = "$Revision: 1.21 $";
52
53 /**
54 * A description of this monitor
55 */
56 public final String DESC = "Monitors Heartbeats.";
57
58 /**
59 * The default (used if not configured) period at
60 * which to check for old heartbeats. (in seconds)
61 */
62 public final int DEFAULT_CHECK_PERIOD = 60;
63
64 //---STATIC METHODS---
65
66 //---CONSTRUCTORS---
67
68 /**
69 * Constructs a new Heartbeat monitor, and starts off
70 * the worker thread.
71 */
72 public Heartbeat__Monitor() {
73 super();
74 createInitialHosts();
75 new HeartbeatWorker().start();
76 }
77
78 //---PUBLIC METHODS---
79
80 /**
81 * Analyse a packet of data. In this case, this will just
82 * register the fact that a heartbeat has arrived.
83 *
84 * @param packet The packet of data to analyse
85 */
86 public void analysePacket(XMLPacket packet) {
87 String source = packet.getParam("packet.attributes.machine_name");
88 if (!_hosts.containsKey(source)) {
89 synchronized(this) {
90 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
91 }
92 }
93 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
94 lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
95 }
96
97 /**
98 * Overrides the {@link java.lang.Object#toString() Object.toString()}
99 * method to provide clean logging (every class should have this).
100 *
101 * This uses the uk.org.iscream.cms.server.util.NameFormat class
102 * to format the toString()
103 *
104 * @return the name of this class and its CVS revision
105 */
106 public String toString() {
107 return FormatName.getName(
108 _name,
109 getClass().getName(),
110 REVISION);
111 }
112
113 /**
114 * return the String representation of what the monitor does
115 */
116 public String getDescription(){
117 return DESC;
118 }
119
120 //---PRIVATE METHODS---
121
122 /**
123 * Checks whether the time since the last heartbeat
124 * is beyond the threshold(s).
125 *
126 * @param timeSinceLastHB a long time since the last heartbeat arrived
127 * @param reg the Register for this host
128 * @return the level which has been breached, if any
129 */
130 private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
131 for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
132 if (reg.getThreshold(thresholdLevel) != -1.0) {
133 if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) {
134 return thresholdLevel;
135 }
136 }
137 }
138 return Alert.thresholdNORMAL;
139 }
140
141 /**
142 * Gets an initial list of hosts from the config
143 * and adds a fake set of heartbeats for them.
144 * If the hosts don't respond within the timeout
145 * period an alert will be raised.
146 *
147 * The effect of this is to allow us to know about
148 * hosts which weren't on when we started up, and
149 * will thus never have generated a heartbeat - yet
150 * will still want to know they're not responding.
151 */
152 private void createInitialHosts() {
153 // get the initial list of hosts from the config
154 String initialHosts = "";
155 try {
156 initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
157 } catch (PropertyNotFoundException e) {
158 // just leave initialHosts empty
159 _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
160 }
161
162 // parse through the initial hosts adding them
163 StringTokenizer st = new StringTokenizer(initialHosts, ";");
164 while (st.hasMoreTokens()) {
165 String source = st.nextToken();
166 // check if they already exist, don't want to add them twice
167 if (!_hosts.containsKey(source)) {
168 synchronized(this) {
169 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
170 }
171 }
172 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
173 // set a "fake" heartbeat
174 lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
175 }
176 }
177
178 //---ACCESSOR/MUTATOR METHODS---
179
180 /**
181 * Returns a reference to the Queue we're getting data
182 * from. This is specific to this monitor.
183 *
184 * @return a reference to a Queue to get data from
185 */
186 protected Queue getQueue() {
187 return MonitorManager.getInstance().getHeartbeatQueue();
188 }
189
190 //---ATTRIBUTES---
191
192 /**
193 * This is the friendly identifier of the
194 * component this class is running in.
195 * eg, a Filter may be called "filter1",
196 * If this class does not have an owning
197 * component, a name from the configuration
198 * can be placed here. This name could also
199 * be changed to null for utility classes.
200 */
201 private String _name = "Heartbeat";
202
203 /**
204 * A reference to the configuration proxy in use
205 */
206 private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
207
208 /**
209 * A HashMap of hosts, with associated HeartbeatHolder's.
210 */
211 private HashMap _hosts = new HashMap();
212
213 /**
214 * A reference to the system logger.
215 */
216 private Logger _logger = ReferenceManager.getInstance().getLogger();
217
218 //---STATIC ATTRIBUTES---
219
220 //---INNER CLASSES---
221
222 /**
223 * This inner class simply holding some information
224 * about a specific host.
225 */
226 private class HeartbeatHolder {
227
228 /**
229 * Construct a new HeartbeatHolder.
230 */
231 public HeartbeatHolder(Register register) {
232 _register = register;
233 }
234
235 /**
236 * Set the time of the last heartbeat
237 */
238 public void setLastHeartbeat(long lastHeartbeat) {
239 _lastHeartbeat = lastHeartbeat;
240 }
241
242 /**
243 * Get the time of the last heartbeat
244 */
245 public long getLastHeartbeat() {
246 return _lastHeartbeat;
247 }
248
249 /**
250 * Get the Register
251 */
252 public Register getRegister() {
253 return _register;
254 }
255
256 /**
257 * last heartbeat time
258 */
259 private long _lastHeartbeat;
260
261 /**
262 * register ref
263 */
264 private Register _register;
265 }
266
267 /**
268 * This worker thread just checks all the hosts and then
269 * waits a period of time before doing it again. It sends
270 * Alerts as required.
271 */
272 private class HeartbeatWorker extends Thread {
273
274 /**
275 * The main run method of this worker thread. It simply
276 * checks through all the hosts it has stored, running
277 * the analyseHB method on each. It then removes any
278 * that have passed a FINAL, and waits a (configured)
279 * length of time before doing it again.
280 */
281 public void run() {
282 ConfigurationProxy cp = ConfigurationProxy.getInstance();
283 while(true) {
284 // this cycle period of this monitor's checks
285 int checkPeriod = 0;
286 try {
287 checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
288 } catch (PropertyNotFoundException e) {
289 checkPeriod = DEFAULT_CHECK_PERIOD;
290 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
291 } catch (NumberFormatException e) {
292 checkPeriod = DEFAULT_CHECK_PERIOD;
293 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
294 }
295
296 synchronized(Heartbeat__Monitor.this) {
297 // perform the checks (use HB hash, although they *should* be the same)
298 Iterator i = _hosts.keySet().iterator();
299 while(i.hasNext()) {
300 // get host
301 String source = (String) i.next();
302 // check it
303 boolean remove = analyseHB(source);
304 // remove it if it's passed a FINAL
305 if(remove) {
306 i.remove();
307 }
308 }
309 }
310
311 // wait a while
312 try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
313 }
314 }
315
316 /**
317 * Analyses a given host's state, and if need be generates
318 * a relevant Alert. Note that it also checks if the last
319 * alert sent is FINAL, in which case it returns true to
320 * indicate removal of this host.
321 *
322 * @param source the host to check
323 * @return whether this host can be deleted
324 */
325 private boolean analyseHB(String source) {
326 ConfigurationProxy cp = ConfigurationProxy.getInstance();
327 HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
328 Register reg = hbHolder.getRegister();
329
330 // get host's HB interval (seconds)
331 // this should always exist, thus we set to 0
332 int hostHBinterval = 0;
333 try {
334 hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
335 } catch (PropertyNotFoundException e) {
336 hostHBinterval = 0;
337 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
338 } catch (NumberFormatException e) {
339 hostHBinterval = 0;
340 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
341 }
342
343 // get host's last HB time (seconds)
344 long lastHeartbeat = hbHolder.getLastHeartbeat();
345 // time since last heartbeat (seconds)
346 long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
347 // time since (or until if negative) the expected heartbeat
348 long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
349
350 // best do a check in case the expected heartbeat is in the future
351 if(timeSinceExpectedHB < 0) {
352 timeSinceExpectedHB = 0;
353 }
354
355 // find out the threshold level we're at
356 int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
357
358 // process the alert
359 Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
360
361 if(reg.getLastAlertLevel() == Alert.alertFINAL) {
362 return true;
363 }
364 return false;
365 }
366 }
367 }