ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
Revision: 1.23
Committed: Tue May 21 16:47:16 2002 UTC (22 years ago) by tdb
Branch: MAIN
Changes since 1.22: +3 -2 lines
Log Message:
Added URL to GPL headers.

File Contents

# Content
1 /*
2 * i-scream central monitoring system
3 * http://www.i-scream.org.uk
4 * Copyright (C) 2000-2002 i-scream
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 */
20
21 //---PACKAGE DECLARATION---
22 package uk.org.iscream.cms.server.client.monitors;
23
24 //---IMPORTS---
25 import java.util.HashMap;
26 import java.util.Iterator;
27 import java.util.StringTokenizer;
28 import uk.org.iscream.cms.server.client.*;
29 import uk.org.iscream.cms.server.core.*;
30 import uk.org.iscream.cms.server.util.*;
31 import uk.org.iscream.cms.server.componentmanager.*;
32
33 /**
34 * This Monitor watches heartbeats.
35 * It generates an alert when a heartbeat that is expected
36 * does not arrive. Unlike all the other monitors, this one
37 * is driven by an event *not* occuring, rather than an
38 * event occuring. This means it must be actively checking
39 * for missing heartbeat's, and thus has an extra inner class
40 * thread.
41 *
42 * @author $Author: tdb $
43 * @version $Id: Heartbeat__Monitor.java,v 1.22 2002/05/18 18:16:00 tdb Exp $
44 */
45 public class Heartbeat__Monitor extends MonitorSkeleton {
46
47 //---FINAL ATTRIBUTES---
48
49 /**
50 * The current CVS revision of this class
51 */
52 public final String REVISION = "$Revision: 1.22 $";
53
54 /**
55 * A description of this monitor
56 */
57 public final String DESC = "Monitors Heartbeats.";
58
59 /**
60 * The default (used if not configured) period at
61 * which to check for old heartbeats. (in seconds)
62 */
63 public final int DEFAULT_CHECK_PERIOD = 60;
64
65 //---STATIC METHODS---
66
67 //---CONSTRUCTORS---
68
69 /**
70 * Constructs a new Heartbeat monitor, and starts off
71 * the worker thread.
72 */
73 public Heartbeat__Monitor() {
74 super();
75 createInitialHosts();
76 new HeartbeatWorker().start();
77 }
78
79 //---PUBLIC METHODS---
80
81 /**
82 * Analyse a packet of data. In this case, this will just
83 * register the fact that a heartbeat has arrived.
84 *
85 * @param packet The packet of data to analyse
86 */
87 public void analysePacket(XMLPacket packet) {
88 String source = packet.getParam("packet.attributes.machine_name");
89 if (!_hosts.containsKey(source)) {
90 synchronized(this) {
91 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
92 }
93 }
94 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
95 lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
96 }
97
98 /**
99 * Overrides the {@link java.lang.Object#toString() Object.toString()}
100 * method to provide clean logging (every class should have this).
101 *
102 * This uses the uk.org.iscream.cms.server.util.NameFormat class
103 * to format the toString()
104 *
105 * @return the name of this class and its CVS revision
106 */
107 public String toString() {
108 return FormatName.getName(
109 _name,
110 getClass().getName(),
111 REVISION);
112 }
113
114 /**
115 * return the String representation of what the monitor does
116 */
117 public String getDescription(){
118 return DESC;
119 }
120
121 //---PRIVATE METHODS---
122
123 /**
124 * Checks whether the time since the last heartbeat
125 * is beyond the threshold(s).
126 *
127 * @param timeSinceLastHB a long time since the last heartbeat arrived
128 * @param reg the Register for this host
129 * @return the level which has been breached, if any
130 */
131 private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
132 for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
133 if (reg.getThreshold(thresholdLevel) != -1.0) {
134 if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) {
135 return thresholdLevel;
136 }
137 }
138 }
139 return Alert.thresholdNORMAL;
140 }
141
142 /**
143 * Gets an initial list of hosts from the config
144 * and adds a fake set of heartbeats for them.
145 * If the hosts don't respond within the timeout
146 * period an alert will be raised.
147 *
148 * The effect of this is to allow us to know about
149 * hosts which weren't on when we started up, and
150 * will thus never have generated a heartbeat - yet
151 * will still want to know they're not responding.
152 */
153 private void createInitialHosts() {
154 // get the initial list of hosts from the config
155 String initialHosts = "";
156 try {
157 initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
158 } catch (PropertyNotFoundException e) {
159 // just leave initialHosts empty
160 _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
161 }
162
163 // parse through the initial hosts adding them
164 StringTokenizer st = new StringTokenizer(initialHosts, ";");
165 while (st.hasMoreTokens()) {
166 String source = st.nextToken();
167 // check if they already exist, don't want to add them twice
168 if (!_hosts.containsKey(source)) {
169 synchronized(this) {
170 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
171 }
172 }
173 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
174 // set a "fake" heartbeat
175 lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
176 }
177 }
178
179 //---ACCESSOR/MUTATOR METHODS---
180
181 /**
182 * Returns a reference to the Queue we're getting data
183 * from. This is specific to this monitor.
184 *
185 * @return a reference to a Queue to get data from
186 */
187 protected Queue getQueue() {
188 return MonitorManager.getInstance().getHeartbeatQueue();
189 }
190
191 //---ATTRIBUTES---
192
193 /**
194 * This is the friendly identifier of the
195 * component this class is running in.
196 * eg, a Filter may be called "filter1",
197 * If this class does not have an owning
198 * component, a name from the configuration
199 * can be placed here. This name could also
200 * be changed to null for utility classes.
201 */
202 private String _name = "Heartbeat";
203
204 /**
205 * A reference to the configuration proxy in use
206 */
207 private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
208
209 /**
210 * A HashMap of hosts, with associated HeartbeatHolder's.
211 */
212 private HashMap _hosts = new HashMap();
213
214 /**
215 * A reference to the system logger.
216 */
217 private Logger _logger = ReferenceManager.getInstance().getLogger();
218
219 //---STATIC ATTRIBUTES---
220
221 //---INNER CLASSES---
222
223 /**
224 * This inner class simply holding some information
225 * about a specific host.
226 */
227 private class HeartbeatHolder {
228
229 /**
230 * Construct a new HeartbeatHolder.
231 */
232 public HeartbeatHolder(Register register) {
233 _register = register;
234 }
235
236 /**
237 * Set the time of the last heartbeat
238 */
239 public void setLastHeartbeat(long lastHeartbeat) {
240 _lastHeartbeat = lastHeartbeat;
241 }
242
243 /**
244 * Get the time of the last heartbeat
245 */
246 public long getLastHeartbeat() {
247 return _lastHeartbeat;
248 }
249
250 /**
251 * Get the Register
252 */
253 public Register getRegister() {
254 return _register;
255 }
256
257 /**
258 * last heartbeat time
259 */
260 private long _lastHeartbeat;
261
262 /**
263 * register ref
264 */
265 private Register _register;
266 }
267
268 /**
269 * This worker thread just checks all the hosts and then
270 * waits a period of time before doing it again. It sends
271 * Alerts as required.
272 */
273 private class HeartbeatWorker extends Thread {
274
275 /**
276 * The main run method of this worker thread. It simply
277 * checks through all the hosts it has stored, running
278 * the analyseHB method on each. It then removes any
279 * that have passed a FINAL, and waits a (configured)
280 * length of time before doing it again.
281 */
282 public void run() {
283 ConfigurationProxy cp = ConfigurationProxy.getInstance();
284 while(true) {
285 // this cycle period of this monitor's checks
286 int checkPeriod = 0;
287 try {
288 checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
289 } catch (PropertyNotFoundException e) {
290 checkPeriod = DEFAULT_CHECK_PERIOD;
291 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
292 } catch (NumberFormatException e) {
293 checkPeriod = DEFAULT_CHECK_PERIOD;
294 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
295 }
296
297 synchronized(Heartbeat__Monitor.this) {
298 // perform the checks (use HB hash, although they *should* be the same)
299 Iterator i = _hosts.keySet().iterator();
300 while(i.hasNext()) {
301 // get host
302 String source = (String) i.next();
303 // check it
304 boolean remove = analyseHB(source);
305 // remove it if it's passed a FINAL
306 if(remove) {
307 i.remove();
308 }
309 }
310 }
311
312 // wait a while
313 try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
314 }
315 }
316
317 /**
318 * Analyses a given host's state, and if need be generates
319 * a relevant Alert. Note that it also checks if the last
320 * alert sent is FINAL, in which case it returns true to
321 * indicate removal of this host.
322 *
323 * @param source the host to check
324 * @return whether this host can be deleted
325 */
326 private boolean analyseHB(String source) {
327 ConfigurationProxy cp = ConfigurationProxy.getInstance();
328 HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
329 Register reg = hbHolder.getRegister();
330
331 // get host's HB interval (seconds)
332 // this should always exist, thus we set to 0
333 int hostHBinterval = 0;
334 try {
335 hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
336 } catch (PropertyNotFoundException e) {
337 hostHBinterval = 0;
338 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
339 } catch (NumberFormatException e) {
340 hostHBinterval = 0;
341 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
342 }
343
344 // get host's last HB time (seconds)
345 long lastHeartbeat = hbHolder.getLastHeartbeat();
346 // time since last heartbeat (seconds)
347 long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
348 // time since (or until if negative) the expected heartbeat
349 long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
350
351 // best do a check in case the expected heartbeat is in the future
352 if(timeSinceExpectedHB < 0) {
353 timeSinceExpectedHB = 0;
354 }
355
356 // find out the threshold level we're at
357 int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
358
359 // process the alert
360 Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
361
362 if(reg.getLastAlertLevel() == Alert.alertFINAL) {
363 return true;
364 }
365 return false;
366 }
367 }
368 }