ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
Revision: 1.21
Committed: Mon Nov 26 12:56:33 2001 UTC (22 years, 6 months ago) by tdb
Branch: MAIN
Branch point for: SERVER_PIRCBOT
Changes since 1.20: +41 -2 lines
Log Message:
Completing this feature request:
  [ #479631 ] heartbeat monitor - starting list

Adds hosts defined in this configuration value to the heartbeat monitor on
startup.

Monitor.Heartbeat.initialHosts=raptor.ukc.ac.uk;myrtle.ukc.ac.uk

This means the Heartbeat Monitor will generate heartbeat alerts if these
hosts don't send in a heartbeat within the expected time. This is useful in
situations where the i-scream server comes up after the hosts have gone
down, which usually wouldn't have generated an alert - because the server
would never have seen the hosts to know they're gone.

File Contents

# User Rev Content
1 tdb 1.1 //---PACKAGE DECLARATION---
2 tdb 1.20 package uk.org.iscream.cms.server.client.monitors;
3 tdb 1.1
4     //---IMPORTS---
5     import java.util.HashMap;
6     import java.util.Iterator;
7 tdb 1.21 import java.util.StringTokenizer;
8 tdb 1.20 import uk.org.iscream.cms.server.client.*;
9     import uk.org.iscream.cms.server.core.*;
10     import uk.org.iscream.cms.server.util.*;
11     import uk.org.iscream.cms.server.componentmanager.*;
12 tdb 1.1
13     /**
14 tdb 1.18 * This Monitor watches heartbeats.
15     * It generates an alert when a heartbeat that is expected
16     * does not arrive. Unlike all the other monitors, this one
17     * is driven by an event *not* occuring, rather than an
18     * event occuring. This means it must be actively checking
19     * for missing heartbeat's, and thus has an extra inner class
20     * thread.
21 tdb 1.1 *
22 tdb 1.19 * @author $Author: tdb1 $
23 tdb 1.21 * @version $Id: Heartbeat__Monitor.java,v 1.20 2001/05/29 17:02:34 tdb1 Exp $
24 tdb 1.1 */
25 ajm 1.14 public class Heartbeat__Monitor extends MonitorSkeleton {
26 tdb 1.1
27     //---FINAL ATTRIBUTES---
28    
29     /**
30     * The current CVS revision of this class
31     */
32 tdb 1.21 public final String REVISION = "$Revision: 1.20 $";
33 tdb 1.1
34 tdb 1.18 /**
35     * A description of this monitor
36     */
37 tdb 1.1 public final String DESC = "Monitors Heartbeats.";
38    
39 tdb 1.18 /**
40     * The default (used if not configured) period at
41     * which to check for old heartbeats. (in seconds)
42     */
43 tdb 1.3 public final int DEFAULT_CHECK_PERIOD = 60;
44    
45 tdb 1.1 //---STATIC METHODS---
46    
47     //---CONSTRUCTORS---
48 tdb 1.18
49     /**
50     * Constructs a new Heartbeat monitor, and starts off
51     * the worker thread.
52     */
53 tdb 1.2 public Heartbeat__Monitor() {
54 ajm 1.16 super();
55 tdb 1.21 createInitialHosts();
56 ajm 1.14 new HeartbeatWorker().start();
57 tdb 1.2 }
58    
59 tdb 1.1 //---PUBLIC METHODS---
60    
61 tdb 1.18 /**
62     * Analyse a packet of data. In this case, this will just
63     * register the fact that a heartbeat has arrived.
64     *
65     * @param packet The packet of data to analyse
66     */
67 ajm 1.14 public void analysePacket(XMLPacket packet) {
68     String source = packet.getParam("packet.attributes.machine_name");
69     if (!_hosts.containsKey(source)) {
70 tdb 1.9 synchronized(this) {
71 tdb 1.18 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
72 tdb 1.1 }
73     }
74 ajm 1.14 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
75     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
76 tdb 1.1 }
77    
78     /**
79     * Overrides the {@link java.lang.Object#toString() Object.toString()}
80     * method to provide clean logging (every class should have this).
81     *
82 tdb 1.20 * This uses the uk.org.iscream.cms.server.util.NameFormat class
83 tdb 1.1 * to format the toString()
84     *
85     * @return the name of this class and its CVS revision
86     */
87     public String toString() {
88     return FormatName.getName(
89     _name,
90     getClass().getName(),
91     REVISION);
92     }
93    
94     /**
95     * return the String representation of what the monitor does
96     */
97     public String getDescription(){
98     return DESC;
99     }
100    
101     //---PRIVATE METHODS---
102    
103 tdb 1.18 /**
104     * Checks whether the time since the last heartbeat
105     * is beyond the threshold(s).
106     *
107     * @param timeSinceLastHB a long time since the last heartbeat arrived
108     * @param reg the Register for this host
109     * @return the level which has been breached, if any
110     */
111 tdb 1.2 private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
112 tdb 1.1 for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
113     if (reg.getThreshold(thresholdLevel) != -1.0) {
114 tdb 1.2 if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) {
115 tdb 1.1 return thresholdLevel;
116     }
117     }
118     }
119 tdb 1.7 return Alert.thresholdNORMAL;
120 tdb 1.21 }
121    
122     /**
123     * Gets an initial list of hosts from the config
124     * and adds a fake set of heartbeats for them.
125     * If the hosts don't respond within the timeout
126     * period an alert will be raised.
127     *
128     * The effect of this is to allow us to know about
129     * hosts which weren't on when we started up, and
130     * will thus never have generated a heartbeat - yet
131     * will still want to know they're not responding.
132     */
133     private void createInitialHosts() {
134     // get the initial list of hosts from the config
135     String initialHosts = "";
136     try {
137     initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
138     } catch (PropertyNotFoundException e) {
139     // just leave initialHosts empty
140     _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
141     }
142    
143     // parse through the initial hosts adding them
144     StringTokenizer st = new StringTokenizer(initialHosts, ";");
145     while (st.hasMoreTokens()) {
146     String source = st.nextToken();
147     // check if they already exist, don't want to add them twice
148     if (!_hosts.containsKey(source)) {
149     synchronized(this) {
150     _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
151     }
152     }
153     HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
154     // set a "fake" heartbeat
155     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
156     }
157 tdb 1.1 }
158    
159     //---ACCESSOR/MUTATOR METHODS---
160 tdb 1.18
161     /**
162     * Returns a reference to the Queue we're getting data
163     * from. This is specific to this monitor.
164     *
165     * @return a reference to a Queue to get data from
166     */
167 ajm 1.14 protected Queue getQueue() {
168     return MonitorManager.getInstance().getHeartbeatQueue();
169     }
170    
171 tdb 1.1 //---ATTRIBUTES---
172    
173     /**
174     * This is the friendly identifier of the
175     * component this class is running in.
176     * eg, a Filter may be called "filter1",
177     * If this class does not have an owning
178     * component, a name from the configuration
179     * can be placed here. This name could also
180     * be changed to null for utility classes.
181     */
182     private String _name = "Heartbeat";
183    
184     /**
185     * A reference to the configuration proxy in use
186     */
187     private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
188 tdb 1.18
189     /**
190     * A HashMap of hosts, with associated HeartbeatHolder's.
191     */
192 tdb 1.6 private HashMap _hosts = new HashMap();
193 tdb 1.18
194     /**
195     * A reference to the system logger.
196     */
197 tdb 1.15 private Logger _logger = ReferenceManager.getInstance().getLogger();
198 tdb 1.1
199     //---STATIC ATTRIBUTES---
200    
201     //---INNER CLASSES---
202 tdb 1.18
203     /**
204     * This inner class simply holding some information
205     * about a specific host.
206     */
207 tdb 1.1 private class HeartbeatHolder {
208    
209 tdb 1.18 /**
210     * Construct a new HeartbeatHolder.
211     */
212     public HeartbeatHolder(Register register) {
213     _register = register;
214 tdb 1.6 }
215    
216 tdb 1.18 /**
217     * Set the time of the last heartbeat
218     */
219 tdb 1.2 public void setLastHeartbeat(long lastHeartbeat) {
220 tdb 1.1 _lastHeartbeat = lastHeartbeat;
221     }
222    
223 tdb 1.18 /**
224     * Get the time of the last heartbeat
225     */
226 tdb 1.2 public long getLastHeartbeat() {
227 tdb 1.1 return _lastHeartbeat;
228     }
229    
230 tdb 1.18 /**
231     * Get the Register
232     */
233     public Register getRegister() {
234     return _register;
235 tdb 1.6 }
236    
237 tdb 1.18 /**
238     * last heartbeat time
239     */
240 tdb 1.2 private long _lastHeartbeat;
241 tdb 1.18
242     /**
243     * register ref
244     */
245     private Register _register;
246 ajm 1.14 }
247    
248 tdb 1.18 /**
249     * This worker thread just checks all the hosts and then
250     * waits a period of time before doing it again. It sends
251     * Alerts as required.
252     */
253 ajm 1.14 private class HeartbeatWorker extends Thread {
254    
255 tdb 1.18 /**
256     * The main run method of this worker thread. It simply
257     * checks through all the hosts it has stored, running
258     * the analyseHB method on each. It then removes any
259     * that have passed a FINAL, and waits a (configured)
260     * length of time before doing it again.
261     */
262 ajm 1.14 public void run() {
263     ConfigurationProxy cp = ConfigurationProxy.getInstance();
264     while(true) {
265     // this cycle period of this monitor's checks
266     int checkPeriod = 0;
267     try {
268     checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
269     } catch (PropertyNotFoundException e) {
270     checkPeriod = DEFAULT_CHECK_PERIOD;
271 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
272 ajm 1.14 } catch (NumberFormatException e) {
273     checkPeriod = DEFAULT_CHECK_PERIOD;
274 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
275 ajm 1.14 }
276    
277 tdb 1.19 synchronized(Heartbeat__Monitor.this) {
278 ajm 1.14 // perform the checks (use HB hash, although they *should* be the same)
279     Iterator i = _hosts.keySet().iterator();
280     while(i.hasNext()) {
281     // get host
282     String source = (String) i.next();
283     // check it
284     boolean remove = analyseHB(source);
285 tdb 1.18 // remove it if it's passed a FINAL
286 ajm 1.14 if(remove) {
287     i.remove();
288     }
289     }
290     }
291    
292     // wait a while
293     try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
294     }
295     }
296 ajm 1.16
297 tdb 1.18 /**
298     * Analyses a given host's state, and if need be generates
299     * a relevant Alert. Note that it also checks if the last
300     * alert sent is FINAL, in which case it returns true to
301     * indicate removal of this host.
302     *
303     * @param source the host to check
304     * @return whether this host can be deleted
305     */
306 ajm 1.16 private boolean analyseHB(String source) {
307     ConfigurationProxy cp = ConfigurationProxy.getInstance();
308     HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
309 tdb 1.18 Register reg = hbHolder.getRegister();
310 ajm 1.16
311     // get host's HB interval (seconds)
312     // this should always exist, thus we set to 0
313     int hostHBinterval = 0;
314     try {
315     hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
316     } catch (PropertyNotFoundException e) {
317     hostHBinterval = 0;
318 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
319 ajm 1.16 } catch (NumberFormatException e) {
320     hostHBinterval = 0;
321 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
322 ajm 1.16 }
323    
324     // get host's last HB time (seconds)
325     long lastHeartbeat = hbHolder.getLastHeartbeat();
326     // time since last heartbeat (seconds)
327     long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
328     // time since (or until if negative) the expected heartbeat
329     long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
330    
331     // best do a check in case the expected heartbeat is in the future
332     if(timeSinceExpectedHB < 0) {
333     timeSinceExpectedHB = 0;
334     }
335    
336     // find out the threshold level we're at
337     int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
338    
339     // process the alert
340 ajm 1.17 Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
341 ajm 1.16
342     if(reg.getLastAlertLevel() == Alert.alertFINAL) {
343     return true;
344     }
345     return false;
346     }
347 ajm 1.14 }
348 tdb 1.1 }