ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
Revision: 1.24
Committed: Wed Feb 5 16:43:45 2003 UTC (21 years, 3 months ago) by tdb
Branch: MAIN
Changes since 1.23: +4 -4 lines
Log Message:
Changed the server to use the external util package. Quite a minor change,
but does affect a lot of files.

File Contents

# User Rev Content
1 tdb 1.22 /*
2     * i-scream central monitoring system
3 tdb 1.23 * http://www.i-scream.org.uk
4 tdb 1.22 * Copyright (C) 2000-2002 i-scream
5     *
6     * This program is free software; you can redistribute it and/or
7     * modify it under the terms of the GNU General Public License
8     * as published by the Free Software Foundation; either version 2
9     * of the License, or (at your option) any later version.
10     *
11     * This program is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     * GNU General Public License for more details.
15     *
16     * You should have received a copy of the GNU General Public License
17     * along with this program; if not, write to the Free Software
18     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19     */
20    
21 tdb 1.1 //---PACKAGE DECLARATION---
22 tdb 1.20 package uk.org.iscream.cms.server.client.monitors;
23 tdb 1.1
24     //---IMPORTS---
25     import java.util.HashMap;
26     import java.util.Iterator;
27 tdb 1.21 import java.util.StringTokenizer;
28 tdb 1.20 import uk.org.iscream.cms.server.client.*;
29     import uk.org.iscream.cms.server.core.*;
30 tdb 1.24 import uk.org.iscream.cms.util.*;
31 tdb 1.20 import uk.org.iscream.cms.server.componentmanager.*;
32 tdb 1.1
33     /**
34 tdb 1.18 * This Monitor watches heartbeats.
35     * It generates an alert when a heartbeat that is expected
36     * does not arrive. Unlike all the other monitors, this one
37     * is driven by an event *not* occuring, rather than an
38     * event occuring. This means it must be actively checking
39     * for missing heartbeat's, and thus has an extra inner class
40     * thread.
41 tdb 1.1 *
42 tdb 1.22 * @author $Author: tdb $
43 tdb 1.24 * @version $Id: Heartbeat__Monitor.java,v 1.23 2002/05/21 16:47:16 tdb Exp $
44 tdb 1.1 */
45 ajm 1.14 public class Heartbeat__Monitor extends MonitorSkeleton {
46 tdb 1.1
47     //---FINAL ATTRIBUTES---
48    
49     /**
50     * The current CVS revision of this class
51     */
52 tdb 1.24 public final String REVISION = "$Revision: 1.23 $";
53 tdb 1.1
54 tdb 1.18 /**
55     * A description of this monitor
56     */
57 tdb 1.1 public final String DESC = "Monitors Heartbeats.";
58    
59 tdb 1.18 /**
60     * The default (used if not configured) period at
61     * which to check for old heartbeats. (in seconds)
62     */
63 tdb 1.3 public final int DEFAULT_CHECK_PERIOD = 60;
64    
65 tdb 1.1 //---STATIC METHODS---
66    
67     //---CONSTRUCTORS---
68 tdb 1.18
69     /**
70     * Constructs a new Heartbeat monitor, and starts off
71     * the worker thread.
72     */
73 tdb 1.2 public Heartbeat__Monitor() {
74 ajm 1.16 super();
75 tdb 1.21 createInitialHosts();
76 ajm 1.14 new HeartbeatWorker().start();
77 tdb 1.2 }
78    
79 tdb 1.1 //---PUBLIC METHODS---
80    
81 tdb 1.18 /**
82     * Analyse a packet of data. In this case, this will just
83     * register the fact that a heartbeat has arrived.
84     *
85     * @param packet The packet of data to analyse
86     */
87 ajm 1.14 public void analysePacket(XMLPacket packet) {
88     String source = packet.getParam("packet.attributes.machine_name");
89     if (!_hosts.containsKey(source)) {
90 tdb 1.9 synchronized(this) {
91 tdb 1.18 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
92 tdb 1.1 }
93     }
94 ajm 1.14 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
95     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
96 tdb 1.1 }
97    
98     /**
99     * Overrides the {@link java.lang.Object#toString() Object.toString()}
100     * method to provide clean logging (every class should have this).
101     *
102 tdb 1.24 * This uses the uk.org.iscream.cms.util.NameFormat class
103 tdb 1.1 * to format the toString()
104     *
105     * @return the name of this class and its CVS revision
106     */
107     public String toString() {
108     return FormatName.getName(
109     _name,
110     getClass().getName(),
111     REVISION);
112     }
113    
114     /**
115     * return the String representation of what the monitor does
116     */
117     public String getDescription(){
118     return DESC;
119     }
120    
121     //---PRIVATE METHODS---
122    
123 tdb 1.18 /**
124     * Checks whether the time since the last heartbeat
125     * is beyond the threshold(s).
126     *
127     * @param timeSinceLastHB a long time since the last heartbeat arrived
128     * @param reg the Register for this host
129     * @return the level which has been breached, if any
130     */
131 tdb 1.2 private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
132 tdb 1.1 for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
133     if (reg.getThreshold(thresholdLevel) != -1.0) {
134 tdb 1.2 if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) {
135 tdb 1.1 return thresholdLevel;
136     }
137     }
138     }
139 tdb 1.7 return Alert.thresholdNORMAL;
140 tdb 1.21 }
141    
142     /**
143     * Gets an initial list of hosts from the config
144     * and adds a fake set of heartbeats for them.
145     * If the hosts don't respond within the timeout
146     * period an alert will be raised.
147     *
148     * The effect of this is to allow us to know about
149     * hosts which weren't on when we started up, and
150     * will thus never have generated a heartbeat - yet
151     * will still want to know they're not responding.
152     */
153     private void createInitialHosts() {
154     // get the initial list of hosts from the config
155     String initialHosts = "";
156     try {
157     initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
158     } catch (PropertyNotFoundException e) {
159     // just leave initialHosts empty
160     _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
161     }
162    
163     // parse through the initial hosts adding them
164     StringTokenizer st = new StringTokenizer(initialHosts, ";");
165     while (st.hasMoreTokens()) {
166     String source = st.nextToken();
167     // check if they already exist, don't want to add them twice
168     if (!_hosts.containsKey(source)) {
169     synchronized(this) {
170     _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
171     }
172     }
173     HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
174     // set a "fake" heartbeat
175     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
176     }
177 tdb 1.1 }
178    
179     //---ACCESSOR/MUTATOR METHODS---
180 tdb 1.18
181     /**
182     * Returns a reference to the Queue we're getting data
183     * from. This is specific to this monitor.
184     *
185     * @return a reference to a Queue to get data from
186     */
187 ajm 1.14 protected Queue getQueue() {
188     return MonitorManager.getInstance().getHeartbeatQueue();
189     }
190    
191 tdb 1.1 //---ATTRIBUTES---
192    
193     /**
194     * This is the friendly identifier of the
195     * component this class is running in.
196     * eg, a Filter may be called "filter1",
197     * If this class does not have an owning
198     * component, a name from the configuration
199     * can be placed here. This name could also
200     * be changed to null for utility classes.
201     */
202     private String _name = "Heartbeat";
203    
204     /**
205     * A reference to the configuration proxy in use
206     */
207     private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
208 tdb 1.18
209     /**
210     * A HashMap of hosts, with associated HeartbeatHolder's.
211     */
212 tdb 1.6 private HashMap _hosts = new HashMap();
213 tdb 1.18
214     /**
215     * A reference to the system logger.
216     */
217 tdb 1.15 private Logger _logger = ReferenceManager.getInstance().getLogger();
218 tdb 1.1
219     //---STATIC ATTRIBUTES---
220    
221     //---INNER CLASSES---
222 tdb 1.18
223     /**
224     * This inner class simply holding some information
225     * about a specific host.
226     */
227 tdb 1.1 private class HeartbeatHolder {
228    
229 tdb 1.18 /**
230     * Construct a new HeartbeatHolder.
231     */
232     public HeartbeatHolder(Register register) {
233     _register = register;
234 tdb 1.6 }
235    
236 tdb 1.18 /**
237     * Set the time of the last heartbeat
238     */
239 tdb 1.2 public void setLastHeartbeat(long lastHeartbeat) {
240 tdb 1.1 _lastHeartbeat = lastHeartbeat;
241     }
242    
243 tdb 1.18 /**
244     * Get the time of the last heartbeat
245     */
246 tdb 1.2 public long getLastHeartbeat() {
247 tdb 1.1 return _lastHeartbeat;
248     }
249    
250 tdb 1.18 /**
251     * Get the Register
252     */
253     public Register getRegister() {
254     return _register;
255 tdb 1.6 }
256    
257 tdb 1.18 /**
258     * last heartbeat time
259     */
260 tdb 1.2 private long _lastHeartbeat;
261 tdb 1.18
262     /**
263     * register ref
264     */
265     private Register _register;
266 ajm 1.14 }
267    
268 tdb 1.18 /**
269     * This worker thread just checks all the hosts and then
270     * waits a period of time before doing it again. It sends
271     * Alerts as required.
272     */
273 ajm 1.14 private class HeartbeatWorker extends Thread {
274    
275 tdb 1.18 /**
276     * The main run method of this worker thread. It simply
277     * checks through all the hosts it has stored, running
278     * the analyseHB method on each. It then removes any
279     * that have passed a FINAL, and waits a (configured)
280     * length of time before doing it again.
281     */
282 ajm 1.14 public void run() {
283     ConfigurationProxy cp = ConfigurationProxy.getInstance();
284     while(true) {
285     // this cycle period of this monitor's checks
286     int checkPeriod = 0;
287     try {
288     checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
289     } catch (PropertyNotFoundException e) {
290     checkPeriod = DEFAULT_CHECK_PERIOD;
291 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
292 ajm 1.14 } catch (NumberFormatException e) {
293     checkPeriod = DEFAULT_CHECK_PERIOD;
294 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
295 ajm 1.14 }
296    
297 tdb 1.19 synchronized(Heartbeat__Monitor.this) {
298 ajm 1.14 // perform the checks (use HB hash, although they *should* be the same)
299     Iterator i = _hosts.keySet().iterator();
300     while(i.hasNext()) {
301     // get host
302     String source = (String) i.next();
303     // check it
304     boolean remove = analyseHB(source);
305 tdb 1.18 // remove it if it's passed a FINAL
306 ajm 1.14 if(remove) {
307     i.remove();
308     }
309     }
310     }
311    
312     // wait a while
313     try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
314     }
315     }
316 ajm 1.16
317 tdb 1.18 /**
318     * Analyses a given host's state, and if need be generates
319     * a relevant Alert. Note that it also checks if the last
320     * alert sent is FINAL, in which case it returns true to
321     * indicate removal of this host.
322     *
323     * @param source the host to check
324     * @return whether this host can be deleted
325     */
326 ajm 1.16 private boolean analyseHB(String source) {
327     ConfigurationProxy cp = ConfigurationProxy.getInstance();
328     HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
329 tdb 1.18 Register reg = hbHolder.getRegister();
330 ajm 1.16
331     // get host's HB interval (seconds)
332     // this should always exist, thus we set to 0
333     int hostHBinterval = 0;
334     try {
335     hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
336     } catch (PropertyNotFoundException e) {
337     hostHBinterval = 0;
338 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
339 ajm 1.16 } catch (NumberFormatException e) {
340     hostHBinterval = 0;
341 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
342 ajm 1.16 }
343    
344     // get host's last HB time (seconds)
345     long lastHeartbeat = hbHolder.getLastHeartbeat();
346     // time since last heartbeat (seconds)
347     long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
348     // time since (or until if negative) the expected heartbeat
349     long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
350    
351     // best do a check in case the expected heartbeat is in the future
352     if(timeSinceExpectedHB < 0) {
353     timeSinceExpectedHB = 0;
354     }
355    
356     // find out the threshold level we're at
357     int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
358    
359     // process the alert
360 ajm 1.17 Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
361 ajm 1.16
362     if(reg.getLastAlertLevel() == Alert.alertFINAL) {
363     return true;
364     }
365     return false;
366     }
367 ajm 1.14 }
368 tdb 1.1 }