ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
(Generate patch)

Comparing projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java (file contents):
Revision 1.11 by tdb, Wed Mar 7 21:38:14 2001 UTC vs.
Revision 1.26 by tdb, Thu Jan 15 13:41:47 2004 UTC

# Line 1 | Line 1
1 + /*
2 + * i-scream central monitoring system
3 + * http://www.i-scream.org.uk
4 + * Copyright (C) 2000-2002 i-scream
5 + *
6 + * This program is free software; you can redistribute it and/or
7 + * modify it under the terms of the GNU General Public License
8 + * as published by the Free Software Foundation; either version 2
9 + * of the License, or (at your option) any later version.
10 + *
11 + * This program is distributed in the hope that it will be useful,
12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 + * GNU General Public License for more details.
15 + *
16 + * You should have received a copy of the GNU General Public License
17 + * along with this program; if not, write to the Free Software
18 + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19 + */
20 +
21   //---PACKAGE DECLARATION---
22 < package uk.ac.ukc.iscream.client.monitors;
22 > package uk.org.iscream.cms.server.client.monitors;
23  
24   //---IMPORTS---
25   import java.util.HashMap;
26   import java.util.Iterator;
27 < import uk.ac.ukc.iscream.client.*;
28 < import uk.ac.ukc.iscream.core.*;
29 < import uk.ac.ukc.iscream.util.*;
30 < import uk.ac.ukc.iscream.componentmanager.*;
27 > import java.util.StringTokenizer;
28 > import uk.org.iscream.cms.server.client.*;
29 > import uk.org.iscream.cms.server.core.*;
30 > import uk.org.iscream.cms.util.*;
31 > import uk.org.iscream.cms.server.componentmanager.*;
32  
33   /**
34 < * This Monitor watches heartbeats
34 > * This Monitor watches heartbeats.
35 > * It generates an alert when a heartbeat that is expected
36 > * does not arrive. Unlike all the other monitors, this one
37 > * is driven by an event *not* occuring, rather than an
38 > * event occuring. This means it must be actively checking
39 > * for missing heartbeat's, and thus has an extra inner class
40 > * thread.
41   *
42 + * This originally took "heartbeat" packets, but they've now
43 + * been deprecated. Instead we look at UDP packets, or, rather
44 + * the lack of them :-)
45 + *
46   * @author  $Author$
47   * @version $Id$
48   */
49 < public class Heartbeat__Monitor extends MonitorSkeleton implements Runnable {
49 > public class Heartbeat__Monitor extends MonitorSkeleton {
50  
51   //---FINAL ATTRIBUTES---
52  
# Line 24 | Line 55 | public class Heartbeat__Monitor extends MonitorSkeleto
55       */
56      public final String REVISION = "$Revision$";
57      
58 +    /**
59 +     * A description of this monitor
60 +     */
61      public final String DESC = "Monitors Heartbeats.";
62      
63 +    /**
64 +     * The default (used if not configured) period at
65 +     * which to check for old heartbeats. (in seconds)
66 +     */
67      public final int DEFAULT_CHECK_PERIOD = 60;
68      
69   //---STATIC METHODS---
70  
71   //---CONSTRUCTORS---
72 <
72 >    
73 >    /**
74 >     * Constructs a new Heartbeat monitor, and starts off
75 >     * the worker thread.
76 >     */
77      public Heartbeat__Monitor() {
78 <        new Thread(this).start();
78 >        super();
79 >        createInitialHosts();
80 >        new HeartbeatWorker().start();
81      }
82  
83   //---PUBLIC METHODS---
84      
85 <    public void run() {
86 <        ConfigurationProxy cp = ConfigurationProxy.getInstance();
87 <        while(true) {
88 <            // this cycle period of this monitor's checks
89 <            int checkPeriod = 0;
90 <            try {
91 <                checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
92 <            } catch (PropertyNotFoundException e) {
93 <                checkPeriod = DEFAULT_CHECK_PERIOD;
94 <                _logger.write(toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
95 <            } catch (NumberFormatException e) {
96 <                checkPeriod = DEFAULT_CHECK_PERIOD;
53 <                _logger.write(toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
54 <            }
55 <            
85 >    /**
86 >     * Analyse a packet of data. In this case, this will just
87 >     * register the fact that a heartbeat has arrived.
88 >     *
89 >     * @param packet The packet of data to analyse
90 >     */
91 >    public void analysePacket(XMLPacket packet) {
92 >        String source = packet.getParam("packet.attributes.machine_name");
93 >        if(!checkBooleanConfig(source, "Monitor." + _name + ".enable")) {
94 >            return;
95 >        }
96 >        if (!_hosts.containsKey(source)) {
97              synchronized(this) {
98 <                // perform the checks (use HB hash, although they *should* be the same)
58 <                Iterator i = _hosts.keySet().iterator();
59 <                while(i.hasNext()) {
60 <                    // get host
61 <                    String source = (String) i.next();
62 <                    // check it
63 <                    boolean remove = analyseHB(source);
64 <                    if(remove) {
65 <                        i.remove();
66 <                    }
67 <                }
98 >                _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
99              }
69            
70            // wait a while
71            try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
100          }
101 +        HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
102 +        lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
103      }
104      
75    public void analysePacket(XMLPacket packet) {
76        if (packet.getParam("packet.attributes.type").equals("heartbeat")) {
77            String source = packet.getParam("packet.attributes.machine_name");
78            if (!_hosts.containsKey(source)) {
79                synchronized(this) {
80                    HashMap registerHash = new HashMap();
81                    registerHash.put(source, new Register(source, _name, 1));
82                    _hosts.put(source, new HeartbeatHolder(registerHash));
83                }
84            }
85            HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
86            lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
87        }
88    }
89    
105      /**
106       * Overrides the {@link java.lang.Object#toString() Object.toString()}
107       * method to provide clean logging (every class should have this).
108       *
109 <     * This uses the uk.ac.ukc.iscream.util.NameFormat class
109 >     * This uses the uk.org.iscream.cms.util.NameFormat class
110       * to format the toString()
111       *
112       * @return the name of this class and its CVS revision
# Line 111 | Line 126 | public class Heartbeat__Monitor extends MonitorSkeleto
126      }
127  
128   //---PRIVATE METHODS---
114
115    private boolean analyseHB(String source) {
116        ConfigurationProxy cp = ConfigurationProxy.getInstance();
117        HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
118        Register reg = (Register) ((HashMap) hbHolder.getRegisterHash()).get(source);
119        
120        // get host's HB interval (seconds)
121        // this should always exist, thus we set to 0
122        int hostHBinterval = 0;
123        try {
124            hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
125        } catch (PropertyNotFoundException e) {
126            hostHBinterval = 0;
127            _logger.write(toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
128        } catch (NumberFormatException e) {
129            hostHBinterval = 0;
130            _logger.write(toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
131        }
132        
133        // get host's last HB time (seconds)
134        long lastHeartbeat = hbHolder.getLastHeartbeat();
135        // time since last heartbeat (seconds)
136        long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
137        // time since (or until if negative) the expected heartbeat
138        long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
139        
140        // best do a check in case the expected heartbeat is in the future
141        if(timeSinceExpectedHB < 0) {
142            timeSinceExpectedHB = 0;
143        }
144        
145        // find out the threshold level we're at
146        int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
147        
148        // process the alert
149        processAlert(newThreshold, 0, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
150        
151        if(reg.getLastAlertLevel(0) == Alert.alertFINAL) {
152            return true;
153        }
154        return false;
155    }
129      
130 +    /**
131 +     * Checks whether the time since the last heartbeat
132 +     * is beyond the threshold(s).
133 +     *
134 +     * @param timeSinceLastHB a long time since the last heartbeat arrived
135 +     * @param reg the Register for this host
136 +     * @return the level which has been breached, if any
137 +     */
138      private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
139          for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
140              if (reg.getThreshold(thresholdLevel) != -1.0) {
# Line 164 | Line 145 | public class Heartbeat__Monitor extends MonitorSkeleto
145          }
146          return Alert.thresholdNORMAL;
147      }
148 +    
149 +    /**
150 +     * Gets an initial list of hosts from the config
151 +     * and adds a fake set of heartbeats for them.
152 +     * If the hosts don't respond within the timeout
153 +     * period an alert will be raised.
154 +     *
155 +     * The effect of this is to allow us to know about
156 +     * hosts which weren't on when we started up, and
157 +     * will thus never have generated a heartbeat - yet
158 +     * will still want to know they're not responding.
159 +     */
160 +    private void createInitialHosts() {
161 +        // get the initial list of hosts from the config
162 +        String initialHosts = "";
163 +        try {
164 +            initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
165 +        } catch (PropertyNotFoundException e) {
166 +            // just leave initialHosts empty
167 +            _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
168 +        }
169 +        
170 +        // parse through the initial hosts adding them
171 +        StringTokenizer st = new StringTokenizer(initialHosts, ";");
172 +        while (st.hasMoreTokens()) {
173 +            String source = st.nextToken();
174 +            // check if they already exist, don't want to add them twice
175 +            if (!_hosts.containsKey(source)) {
176 +                synchronized(this) {
177 +                    _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
178 +                }
179 +            }
180 +            HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
181 +            // set a "fake" heartbeat
182 +            lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
183 +        }
184 +    }
185  
186   //---ACCESSOR/MUTATOR METHODS---
187 <
187 >    
188 >    /**
189 >     * Returns a reference to the Queue we're getting data
190 >     * from. This is specific to this monitor.
191 >     *
192 >     * @return a reference to a Queue to get data from
193 >     */
194 >    protected Queue getQueue() {
195 >        return MonitorManager.getInstance().getDataQueue();
196 >    }
197 >    
198   //---ATTRIBUTES---
199  
200      /**
# Line 184 | Line 212 | public class Heartbeat__Monitor extends MonitorSkeleto
212       * A reference to the configuration proxy in use
213       */
214      private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
215 <
215 >    
216 >    /**
217 >     * A HashMap of hosts, with associated HeartbeatHolder's.
218 >     */
219      private HashMap _hosts = new HashMap();
220 +    
221 +    /**
222 +     * A reference to the system logger.
223 +     */
224 +    private Logger _logger = ReferenceManager.getInstance().getLogger();
225  
226   //---STATIC ATTRIBUTES---
227  
228   //---INNER CLASSES---
229 <
229 >    
230 >    /**
231 >     * This inner class simply holding some information
232 >     * about a specific host.
233 >     */
234      private class HeartbeatHolder {
235          
236 <        public HeartbeatHolder(HashMap registerHash) {
237 <            _registerHash = registerHash;
236 >        /**
237 >         * Construct a new HeartbeatHolder.
238 >         */
239 >        public HeartbeatHolder(Register register) {
240 >            _register = register;
241          }
242          
243 +        /**
244 +         * Set the time of the last heartbeat
245 +         */
246          public void setLastHeartbeat(long lastHeartbeat) {
247              _lastHeartbeat = lastHeartbeat;
248          }
249          
250 +        /**
251 +         * Get the time of the last heartbeat
252 +         */
253          public long getLastHeartbeat() {
254              return _lastHeartbeat;
255          }
256          
257 <        public HashMap getRegisterHash() {
258 <            return _registerHash;
257 >        /**
258 >         * Get the Register
259 >         */
260 >        public Register getRegister() {
261 >            return _register;
262          }
263          
264 +        /**
265 +         * last heartbeat time
266 +         */
267          private long _lastHeartbeat;
268 <        private HashMap _registerHash;
269 <    }  
270 <
268 >        
269 >        /**
270 >         * register ref
271 >         */
272 >        private Register _register;
273 >    }
274 >    
275 >    /**
276 >     * This worker thread just checks all the hosts and then
277 >     * waits a period of time before doing it again. It sends
278 >     * Alerts as required.
279 >     */
280 >    private class HeartbeatWorker extends Thread {
281 >        
282 >        /**
283 >         * The main run method of this worker thread. It simply
284 >         * checks through all the hosts it has stored, running
285 >         * the analyseHB method on each. It then removes any
286 >         * that have passed a FINAL, and waits a (configured)
287 >         * length of time before doing it again.
288 >         */
289 >        public void run() {
290 >            ConfigurationProxy cp = ConfigurationProxy.getInstance();
291 >            while(true) {
292 >                // this cycle period of this monitor's checks
293 >                int checkPeriod = 0;
294 >                try {
295 >                    checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
296 >                } catch (PropertyNotFoundException e) {
297 >                    checkPeriod = DEFAULT_CHECK_PERIOD;
298 >                    _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
299 >                } catch (NumberFormatException e) {
300 >                    checkPeriod = DEFAULT_CHECK_PERIOD;
301 >                    _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
302 >                }
303 >                
304 >                synchronized(Heartbeat__Monitor.this) {
305 >                    // perform the checks (use HB hash, although they *should* be the same)
306 >                    Iterator i = _hosts.keySet().iterator();
307 >                    while(i.hasNext()) {
308 >                        // get host
309 >                        String source = (String) i.next();
310 >                        // check it
311 >                        boolean remove = analyseHB(source);
312 >                        // remove it if it's passed a FINAL
313 >                        if(remove) {
314 >                            i.remove();
315 >                        }
316 >                    }
317 >                }
318 >                
319 >                // wait a while
320 >                try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
321 >            }
322 >        }
323 >        
324 >        /**
325 >         * Analyses a given host's state, and if need be generates
326 >         * a relevant Alert. Note that it also checks if the last
327 >         * alert sent is FINAL, in which case it returns true to
328 >         * indicate removal of this host.
329 >         *
330 >         * @param source the host to check
331 >         * @return whether this host can be deleted
332 >         */
333 >        private boolean analyseHB(String source) {
334 >            ConfigurationProxy cp = ConfigurationProxy.getInstance();
335 >            HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
336 >            Register reg = hbHolder.getRegister();
337 >            
338 >            // get host's HB interval (seconds)
339 >            // this should always exist, thus we set to 0
340 >            int hostHBinterval = 0;
341 >            try {
342 >                hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.UDPUpdateTime"));
343 >            } catch (PropertyNotFoundException e) {
344 >                hostHBinterval = 0;
345 >                _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "UDPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
346 >            } catch (NumberFormatException e) {
347 >                hostHBinterval = 0;
348 >                _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous UDPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
349 >            }
350 >            
351 >            // get host's last HB time (seconds)
352 >            long lastHeartbeat = hbHolder.getLastHeartbeat();
353 >            // time since last heartbeat (seconds)
354 >            long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
355 >            // time since (or until if negative) the expected heartbeat
356 >            long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
357 >            
358 >            // best do a check in case the expected heartbeat is in the future
359 >            if(timeSinceExpectedHB < 0) {
360 >                timeSinceExpectedHB = 0;
361 >            }
362 >            
363 >            // find out the threshold level we're at
364 >            int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
365 >            
366 >            // process the alert
367 >            Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
368 >            
369 >            if(reg.getLastAlertLevel() == Alert.alertFINAL) {
370 >                return true;
371 >            }
372 >            return false;
373 >        }
374 >    }
375   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines