ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
Revision: 1.28
Committed: Sun Aug 1 10:40:45 2004 UTC (19 years, 9 months ago) by tdb
Branch: MAIN
CVS Tags: HEAD
Changes since 1.27: +3 -3 lines
Log Message:
Catch a lot of old URL's and update them. Also remove a couple of old files
that aren't used.

File Contents

# User Rev Content
1 tdb 1.22 /*
2     * i-scream central monitoring system
3 tdb 1.28 * http://www.i-scream.org
4 tdb 1.22 * Copyright (C) 2000-2002 i-scream
5     *
6     * This program is free software; you can redistribute it and/or
7     * modify it under the terms of the GNU General Public License
8     * as published by the Free Software Foundation; either version 2
9     * of the License, or (at your option) any later version.
10     *
11     * This program is distributed in the hope that it will be useful,
12     * but WITHOUT ANY WARRANTY; without even the implied warranty of
13     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     * GNU General Public License for more details.
15     *
16     * You should have received a copy of the GNU General Public License
17     * along with this program; if not, write to the Free Software
18     * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19     */
20    
21 tdb 1.1 //---PACKAGE DECLARATION---
22 tdb 1.20 package uk.org.iscream.cms.server.client.monitors;
23 tdb 1.1
24     //---IMPORTS---
25     import java.util.HashMap;
26     import java.util.Iterator;
27 tdb 1.21 import java.util.StringTokenizer;
28 tdb 1.20 import uk.org.iscream.cms.server.client.*;
29     import uk.org.iscream.cms.server.core.*;
30 tdb 1.24 import uk.org.iscream.cms.util.*;
31 tdb 1.20 import uk.org.iscream.cms.server.componentmanager.*;
32 tdb 1.1
33     /**
34 tdb 1.18 * This Monitor watches heartbeats.
35     * It generates an alert when a heartbeat that is expected
36     * does not arrive. Unlike all the other monitors, this one
37     * is driven by an event *not* occuring, rather than an
38     * event occuring. This means it must be actively checking
39     * for missing heartbeat's, and thus has an extra inner class
40     * thread.
41 tdb 1.1 *
42 tdb 1.25 * This originally took "heartbeat" packets, but they've now
43     * been deprecated. Instead we look at UDP packets, or, rather
44     * the lack of them :-)
45     *
46 tdb 1.22 * @author $Author: tdb $
47 tdb 1.28 * @version $Id: Heartbeat__Monitor.java,v 1.27 2004/01/15 14:10:13 tdb Exp $
48 tdb 1.1 */
49 ajm 1.14 public class Heartbeat__Monitor extends MonitorSkeleton {
50 tdb 1.1
51     //---FINAL ATTRIBUTES---
52    
53     /**
54     * The current CVS revision of this class
55     */
56 tdb 1.28 public final String REVISION = "$Revision: 1.27 $";
57 tdb 1.1
58 tdb 1.18 /**
59     * A description of this monitor
60     */
61 tdb 1.1 public final String DESC = "Monitors Heartbeats.";
62    
63 tdb 1.18 /**
64     * The default (used if not configured) period at
65     * which to check for old heartbeats. (in seconds)
66     */
67 tdb 1.3 public final int DEFAULT_CHECK_PERIOD = 60;
68    
69 tdb 1.1 //---STATIC METHODS---
70    
71     //---CONSTRUCTORS---
72 tdb 1.18
73     /**
74     * Constructs a new Heartbeat monitor, and starts off
75     * the worker thread.
76     */
77 tdb 1.2 public Heartbeat__Monitor() {
78 ajm 1.16 super();
79 tdb 1.21 createInitialHosts();
80 ajm 1.14 new HeartbeatWorker().start();
81 tdb 1.2 }
82    
83 tdb 1.1 //---PUBLIC METHODS---
84    
85 tdb 1.18 /**
86     * Analyse a packet of data. In this case, this will just
87     * register the fact that a heartbeat has arrived.
88     *
89     * @param packet The packet of data to analyse
90     */
91 ajm 1.14 public void analysePacket(XMLPacket packet) {
92     String source = packet.getParam("packet.attributes.machine_name");
93 tdb 1.27 if(!checkBooleanConfig("Host." + source, "Monitor." + _name + ".enable")) {
94 tdb 1.26 return;
95     }
96 ajm 1.14 if (!_hosts.containsKey(source)) {
97 tdb 1.9 synchronized(this) {
98 tdb 1.18 _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
99 tdb 1.1 }
100     }
101 ajm 1.14 HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
102     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
103 tdb 1.1 }
104    
105     /**
106     * Overrides the {@link java.lang.Object#toString() Object.toString()}
107     * method to provide clean logging (every class should have this).
108     *
109 tdb 1.24 * This uses the uk.org.iscream.cms.util.NameFormat class
110 tdb 1.1 * to format the toString()
111     *
112     * @return the name of this class and its CVS revision
113     */
114     public String toString() {
115     return FormatName.getName(
116     _name,
117     getClass().getName(),
118     REVISION);
119     }
120    
121     /**
122     * return the String representation of what the monitor does
123     */
124     public String getDescription(){
125     return DESC;
126     }
127    
128     //---PRIVATE METHODS---
129    
130 tdb 1.18 /**
131     * Checks whether the time since the last heartbeat
132     * is beyond the threshold(s).
133     *
134     * @param timeSinceLastHB a long time since the last heartbeat arrived
135     * @param reg the Register for this host
136     * @return the level which has been breached, if any
137     */
138 tdb 1.2 private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
139 tdb 1.1 for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
140     if (reg.getThreshold(thresholdLevel) != -1.0) {
141 tdb 1.2 if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) {
142 tdb 1.1 return thresholdLevel;
143     }
144     }
145     }
146 tdb 1.7 return Alert.thresholdNORMAL;
147 tdb 1.21 }
148    
149     /**
150     * Gets an initial list of hosts from the config
151     * and adds a fake set of heartbeats for them.
152     * If the hosts don't respond within the timeout
153     * period an alert will be raised.
154     *
155     * The effect of this is to allow us to know about
156     * hosts which weren't on when we started up, and
157     * will thus never have generated a heartbeat - yet
158     * will still want to know they're not responding.
159     */
160     private void createInitialHosts() {
161     // get the initial list of hosts from the config
162     String initialHosts = "";
163     try {
164     initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
165     } catch (PropertyNotFoundException e) {
166     // just leave initialHosts empty
167     _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
168     }
169    
170     // parse through the initial hosts adding them
171     StringTokenizer st = new StringTokenizer(initialHosts, ";");
172     while (st.hasMoreTokens()) {
173     String source = st.nextToken();
174     // check if they already exist, don't want to add them twice
175     if (!_hosts.containsKey(source)) {
176     synchronized(this) {
177     _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
178     }
179     }
180     HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
181     // set a "fake" heartbeat
182     lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
183     }
184 tdb 1.1 }
185    
186     //---ACCESSOR/MUTATOR METHODS---
187 tdb 1.18
188     /**
189     * Returns a reference to the Queue we're getting data
190     * from. This is specific to this monitor.
191     *
192     * @return a reference to a Queue to get data from
193     */
194 ajm 1.14 protected Queue getQueue() {
195 tdb 1.25 return MonitorManager.getInstance().getDataQueue();
196 ajm 1.14 }
197    
198 tdb 1.1 //---ATTRIBUTES---
199    
200     /**
201     * This is the friendly identifier of the
202     * component this class is running in.
203     * eg, a Filter may be called "filter1",
204     * If this class does not have an owning
205     * component, a name from the configuration
206     * can be placed here. This name could also
207     * be changed to null for utility classes.
208     */
209     private String _name = "Heartbeat";
210    
211     /**
212     * A reference to the configuration proxy in use
213     */
214     private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
215 tdb 1.18
216     /**
217     * A HashMap of hosts, with associated HeartbeatHolder's.
218     */
219 tdb 1.6 private HashMap _hosts = new HashMap();
220 tdb 1.18
221     /**
222     * A reference to the system logger.
223     */
224 tdb 1.15 private Logger _logger = ReferenceManager.getInstance().getLogger();
225 tdb 1.1
226     //---STATIC ATTRIBUTES---
227    
228     //---INNER CLASSES---
229 tdb 1.18
230     /**
231     * This inner class simply holding some information
232     * about a specific host.
233     */
234 tdb 1.1 private class HeartbeatHolder {
235    
236 tdb 1.18 /**
237     * Construct a new HeartbeatHolder.
238     */
239     public HeartbeatHolder(Register register) {
240     _register = register;
241 tdb 1.6 }
242    
243 tdb 1.18 /**
244     * Set the time of the last heartbeat
245     */
246 tdb 1.2 public void setLastHeartbeat(long lastHeartbeat) {
247 tdb 1.1 _lastHeartbeat = lastHeartbeat;
248     }
249    
250 tdb 1.18 /**
251     * Get the time of the last heartbeat
252     */
253 tdb 1.2 public long getLastHeartbeat() {
254 tdb 1.1 return _lastHeartbeat;
255     }
256    
257 tdb 1.18 /**
258     * Get the Register
259     */
260     public Register getRegister() {
261     return _register;
262 tdb 1.6 }
263    
264 tdb 1.18 /**
265     * last heartbeat time
266     */
267 tdb 1.2 private long _lastHeartbeat;
268 tdb 1.18
269     /**
270     * register ref
271     */
272     private Register _register;
273 ajm 1.14 }
274    
275 tdb 1.18 /**
276     * This worker thread just checks all the hosts and then
277     * waits a period of time before doing it again. It sends
278     * Alerts as required.
279     */
280 ajm 1.14 private class HeartbeatWorker extends Thread {
281    
282 tdb 1.18 /**
283     * The main run method of this worker thread. It simply
284     * checks through all the hosts it has stored, running
285     * the analyseHB method on each. It then removes any
286     * that have passed a FINAL, and waits a (configured)
287     * length of time before doing it again.
288     */
289 ajm 1.14 public void run() {
290     ConfigurationProxy cp = ConfigurationProxy.getInstance();
291     while(true) {
292     // this cycle period of this monitor's checks
293     int checkPeriod = 0;
294     try {
295     checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
296     } catch (PropertyNotFoundException e) {
297     checkPeriod = DEFAULT_CHECK_PERIOD;
298 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
299 ajm 1.14 } catch (NumberFormatException e) {
300     checkPeriod = DEFAULT_CHECK_PERIOD;
301 tdb 1.19 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
302 ajm 1.14 }
303    
304 tdb 1.19 synchronized(Heartbeat__Monitor.this) {
305 ajm 1.14 // perform the checks (use HB hash, although they *should* be the same)
306     Iterator i = _hosts.keySet().iterator();
307     while(i.hasNext()) {
308     // get host
309     String source = (String) i.next();
310     // check it
311     boolean remove = analyseHB(source);
312 tdb 1.18 // remove it if it's passed a FINAL
313 ajm 1.14 if(remove) {
314     i.remove();
315     }
316     }
317     }
318    
319     // wait a while
320     try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
321     }
322     }
323 ajm 1.16
324 tdb 1.18 /**
325     * Analyses a given host's state, and if need be generates
326     * a relevant Alert. Note that it also checks if the last
327     * alert sent is FINAL, in which case it returns true to
328     * indicate removal of this host.
329     *
330     * @param source the host to check
331     * @return whether this host can be deleted
332     */
333 ajm 1.16 private boolean analyseHB(String source) {
334     ConfigurationProxy cp = ConfigurationProxy.getInstance();
335     HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
336 tdb 1.18 Register reg = hbHolder.getRegister();
337 ajm 1.16
338     // get host's HB interval (seconds)
339     // this should always exist, thus we set to 0
340     int hostHBinterval = 0;
341     try {
342 tdb 1.25 hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.UDPUpdateTime"));
343 ajm 1.16 } catch (PropertyNotFoundException e) {
344     hostHBinterval = 0;
345 tdb 1.25 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "UDPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
346 ajm 1.16 } catch (NumberFormatException e) {
347     hostHBinterval = 0;
348 tdb 1.25 _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous UDPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
349 ajm 1.16 }
350    
351     // get host's last HB time (seconds)
352     long lastHeartbeat = hbHolder.getLastHeartbeat();
353     // time since last heartbeat (seconds)
354     long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
355     // time since (or until if negative) the expected heartbeat
356     long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
357    
358     // best do a check in case the expected heartbeat is in the future
359     if(timeSinceExpectedHB < 0) {
360     timeSinceExpectedHB = 0;
361     }
362    
363     // find out the threshold level we're at
364     int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
365    
366     // process the alert
367 ajm 1.17 Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
368 ajm 1.16
369     if(reg.getLastAlertLevel() == Alert.alertFINAL) {
370     return true;
371     }
372     return false;
373     }
374 ajm 1.14 }
375 tdb 1.1 }