ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
Revision: 1.22
Committed: Sat May 18 18:16:00 2002 UTC (22 years ago) by tdb
Branch: MAIN
Changes since 1.21: +22 -3 lines
Log Message:
i-scream is now licensed under the GPL. I've added the GPL headers to every
source file, and put a full copy of the license in the appropriate places.
I think I've covered everything. This is going to be a mad commit ;)

File Contents

# User Rev Content
1 tdb 1.22 /*
2     * i-scream central monitoring system
3     * Copyright (C) 2000-2002 i-scream
4     *
5     * This program is free software; you can redistribute it and/or
6     * modify it under the terms of the GNU General Public License
7     * as published by the Free Software Foundation; either version 2
8     * of the License, or (at your option) any later version.
9     *
10     * This program is distributed in the hope that it will be useful,
11     * but WITHOUT ANY WARRANTY; without even the implied warranty of
12     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     * GNU General Public License for more details.
14     *
15     * You should have received a copy of the GNU General Public License
16     * along with this program; if not, write to the Free Software
17     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18     */
19    
20 tdb 1.1 //---PACKAGE DECLARATION---
21 tdb 1.20 package uk.org.iscream.cms.server.client.monitors;
22 tdb 1.1
23     //---IMPORTS---
24     import java.util.HashMap;
25     import java.util.Iterator;
26 tdb 1.21 import java.util.StringTokenizer;
27 tdb 1.20 import uk.org.iscream.cms.server.client.*;
28     import uk.org.iscream.cms.server.core.*;
29     import uk.org.iscream.cms.server.util.*;
30     import uk.org.iscream.cms.server.componentmanager.*;
31 tdb 1.1
32     /**
33 tdb 1.18 * This Monitor watches heartbeats.
34     * It generates an alert when a heartbeat that is expected
35     * does not arrive. Unlike all the other monitors, this one
36     * is driven by an event *not* occuring, rather than an
37     * event occuring. This means it must be actively checking
38     * for missing heartbeat's, and thus has an extra inner class
39     * thread.
40 tdb 1.1 *
41 tdb 1.22 * @author $Author: tdb $
42     * @version $Id: Heartbeat__Monitor.java,v 1.21 2001/11/26 12:56:33 tdb Exp $
43 tdb 1.1 */
44 ajm 1.14 public class Heartbeat__Monitor extends MonitorSkeleton {
45 tdb 1.1
46     //---FINAL ATTRIBUTES---
47    
48     /**
49     * The current CVS revision of this class
50     */
51 tdb 1.22 public final String REVISION = "$Revision: 1.21 $";
52 tdb 1.1
53 tdb 1.18 /**
54     * A description of this monitor
55     */
56 tdb 1.1 public final String DESC = "Monitors Heartbeats.";
57    
58 tdb 1.18 /**
59     * The default (used if not configured) period at
60     * which to check for old heartbeats. (in seconds)
61     */
62 tdb 1.3 public final int DEFAULT_CHECK_PERIOD = 60;
63    
64 tdb 1.1 //---STATIC METHODS---
65    
66     //---CONSTRUCTORS---
67 tdb 1.18
68     /**
69     * Constructs a new Heartbeat monitor, and starts off
70     * the worker thread.
71     */
72 tdb 1.2 public Heartbeat__Monitor() {
73 ajm 1.16 super();
74 tdb 1.21 createInitialHosts();
75 ajm 1.14 new HeartbeatWorker().start();
76 tdb 1.2 }
77    
78 tdb 1.1 //---PUBLIC METHODS---
79    
80 tdb 1.18 /**
81     * Analyse a packet of data. In this case, this will just
82     * register the fact that a heartbeat has arrived.
83     *
84     * @param packet The packet of data to analyse
85     */
86 ajm 1.14 public void analysePacket(XMLPacket packet) {
87     String source = packet.getParam("packet.attributes.machine_name");
88     if (!_hosts.containsKey(source)) {
89 tdb 1.9 synchronized(this) {
90 tdb 1.18 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
91 tdb 1.1 }
92     }
93 ajm 1.14 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
94     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
95 tdb 1.1 }
96    
97     /**
98     * Overrides the {@link java.lang.Object#toString() Object.toString()}
99     * method to provide clean logging (every class should have this).
100     *
101 tdb 1.20 * This uses the uk.org.iscream.cms.server.util.NameFormat class
102 tdb 1.1 * to format the toString()
103     *
104     * @return the name of this class and its CVS revision
105     */
106     public String toString() {
107     return FormatName.getName(
108     _name,
109     getClass().getName(),
110     REVISION);
111     }
112    
113     /**
114     * return the String representation of what the monitor does
115     */
116     public String getDescription(){
117     return DESC;
118     }
119    
120     //---PRIVATE METHODS---
121    
122 tdb 1.18 /**
123     * Checks whether the time since the last heartbeat
124     * is beyond the threshold(s).
125     *
126     * @param timeSinceLastHB a long time since the last heartbeat arrived
127     * @param reg the Register for this host
128     * @return the level which has been breached, if any
129     */
130 tdb 1.2 private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
131 tdb 1.1 for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
132     if (reg.getThreshold(thresholdLevel) != -1.0) {
133 tdb 1.2 if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) {
134 tdb 1.1 return thresholdLevel;
135     }
136     }
137     }
138 tdb 1.7 return Alert.thresholdNORMAL;
139 tdb 1.21 }
140    
141     /**
142     * Gets an initial list of hosts from the config
143     * and adds a fake set of heartbeats for them.
144     * If the hosts don't respond within the timeout
145     * period an alert will be raised.
146     *
147     * The effect of this is to allow us to know about
148     * hosts which weren't on when we started up, and
149     * will thus never have generated a heartbeat - yet
150     * will still want to know they're not responding.
151     */
152     private void createInitialHosts() {
153     // get the initial list of hosts from the config
154     String initialHosts = "";
155     try {
156     initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
157     } catch (PropertyNotFoundException e) {
158     // just leave initialHosts empty
159     _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
160     }
161    
162     // parse through the initial hosts adding them
163     StringTokenizer st = new StringTokenizer(initialHosts, ";");
164     while (st.hasMoreTokens()) {
165     String source = st.nextToken();
166     // check if they already exist, don't want to add them twice
167     if (!_hosts.containsKey(source)) {
168     synchronized(this) {
169     _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
170     }
171     }
172     HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
173     // set a "fake" heartbeat
174     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
175     }
176 tdb 1.1 }
177    
178     //---ACCESSOR/MUTATOR METHODS---
179 tdb 1.18
180     /**
181     * Returns a reference to the Queue we're getting data
182     * from. This is specific to this monitor.
183     *
184     * @return a reference to a Queue to get data from
185     */
186 ajm 1.14 protected Queue getQueue() {
187     return MonitorManager.getInstance().getHeartbeatQueue();
188     }
189    
190 tdb 1.1 //---ATTRIBUTES---
191    
192     /**
193     * This is the friendly identifier of the
194     * component this class is running in.
195     * eg, a Filter may be called "filter1",
196     * If this class does not have an owning
197     * component, a name from the configuration
198     * can be placed here. This name could also
199     * be changed to null for utility classes.
200     */
201     private String _name = "Heartbeat";
202    
203     /**
204     * A reference to the configuration proxy in use
205     */
206     private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
207 tdb 1.18
208     /**
209     * A HashMap of hosts, with associated HeartbeatHolder's.
210     */
211 tdb 1.6 private HashMap _hosts = new HashMap();
212 tdb 1.18
213     /**
214     * A reference to the system logger.
215     */
216 tdb 1.15 private Logger _logger = ReferenceManager.getInstance().getLogger();
217 tdb 1.1
218     //---STATIC ATTRIBUTES---
219    
220     //---INNER CLASSES---
221 tdb 1.18
222     /**
223     * This inner class simply holding some information
224     * about a specific host.
225     */
226 tdb 1.1 private class HeartbeatHolder {
227    
228 tdb 1.18 /**
229     * Construct a new HeartbeatHolder.
230     */
231     public HeartbeatHolder(Register register) {
232     _register = register;
233 tdb 1.6 }
234    
235 tdb 1.18 /**
236     * Set the time of the last heartbeat
237     */
238 tdb 1.2 public void setLastHeartbeat(long lastHeartbeat) {
239 tdb 1.1 _lastHeartbeat = lastHeartbeat;
240     }
241    
242 tdb 1.18 /**
243     * Get the time of the last heartbeat
244     */
245 tdb 1.2 public long getLastHeartbeat() {
246 tdb 1.1 return _lastHeartbeat;
247     }
248    
249 tdb 1.18 /**
250     * Get the Register
251     */
252     public Register getRegister() {
253     return _register;
254 tdb 1.6 }
255    
256 tdb 1.18 /**
257     * last heartbeat time
258     */
259 tdb 1.2 private long _lastHeartbeat;
260 tdb 1.18
261     /**
262     * register ref
263     */
264     private Register _register;
265 ajm 1.14 }
266    
267 tdb 1.18 /**
268     * This worker thread just checks all the hosts and then
269     * waits a period of time before doing it again. It sends
270     * Alerts as required.
271     */
272 ajm 1.14 private class HeartbeatWorker extends Thread {
273    
274 tdb 1.18 /**
275     * The main run method of this worker thread. It simply
276     * checks through all the hosts it has stored, running
277     * the analyseHB method on each. It then removes any
278     * that have passed a FINAL, and waits a (configured)
279     * length of time before doing it again.
280     */
281 ajm 1.14 public void run() {
282     ConfigurationProxy cp = ConfigurationProxy.getInstance();
283     while(true) {
284     // this cycle period of this monitor's checks
285     int checkPeriod = 0;
286     try {
287     checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
288     } catch (PropertyNotFoundException e) {
289     checkPeriod = DEFAULT_CHECK_PERIOD;
290 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
291 ajm 1.14 } catch (NumberFormatException e) {
292     checkPeriod = DEFAULT_CHECK_PERIOD;
293 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
294 ajm 1.14 }
295    
296 tdb 1.19 synchronized(Heartbeat__Monitor.this) {
297 ajm 1.14 // perform the checks (use HB hash, although they *should* be the same)
298     Iterator i = _hosts.keySet().iterator();
299     while(i.hasNext()) {
300     // get host
301     String source = (String) i.next();
302     // check it
303     boolean remove = analyseHB(source);
304 tdb 1.18 // remove it if it's passed a FINAL
305 ajm 1.14 if(remove) {
306     i.remove();
307     }
308     }
309     }
310    
311     // wait a while
312     try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
313     }
314     }
315 ajm 1.16
316 tdb 1.18 /**
317     * Analyses a given host's state, and if need be generates
318     * a relevant Alert. Note that it also checks if the last
319     * alert sent is FINAL, in which case it returns true to
320     * indicate removal of this host.
321     *
322     * @param source the host to check
323     * @return whether this host can be deleted
324     */
325 ajm 1.16 private boolean analyseHB(String source) {
326     ConfigurationProxy cp = ConfigurationProxy.getInstance();
327     HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
328 tdb 1.18 Register reg = hbHolder.getRegister();
329 ajm 1.16
330     // get host's HB interval (seconds)
331     // this should always exist, thus we set to 0
332     int hostHBinterval = 0;
333     try {
334     hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
335     } catch (PropertyNotFoundException e) {
336     hostHBinterval = 0;
337 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
338 ajm 1.16 } catch (NumberFormatException e) {
339     hostHBinterval = 0;
340 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
341 ajm 1.16 }
342    
343     // get host's last HB time (seconds)
344     long lastHeartbeat = hbHolder.getLastHeartbeat();
345     // time since last heartbeat (seconds)
346     long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
347     // time since (or until if negative) the expected heartbeat
348     long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
349    
350     // best do a check in case the expected heartbeat is in the future
351     if(timeSinceExpectedHB < 0) {
352     timeSinceExpectedHB = 0;
353     }
354    
355     // find out the threshold level we're at
356     int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
357    
358     // process the alert
359 ajm 1.17 Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
360 ajm 1.16
361     if(reg.getLastAlertLevel() == Alert.alertFINAL) {
362     return true;
363     }
364     return false;
365     }
366 ajm 1.14 }
367 tdb 1.1 }