ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
(Generate patch)

Comparing projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java (file contents):
Revision 1.1 by tdb, Mon Mar 5 13:30:34 2001 UTC vs.
Revision 1.19 by tdb, Mon Mar 26 18:07:11 2001 UTC

# Line 1 | Line 1
1   //---PACKAGE DECLARATION---
2 < package uk.ac.ukc.iscream.client.monitors;
2 > package uk.org.iscream.client.monitors;
3  
4   //---IMPORTS---
5   import java.util.HashMap;
6   import java.util.Iterator;
7 < import uk.ac.ukc.iscream.client.*;
8 < import uk.ac.ukc.iscream.core.*;
9 < import uk.ac.ukc.iscream.util.*;
10 < import uk.ac.ukc.iscream.componentmanager.*;
7 > import uk.org.iscream.client.*;
8 > import uk.org.iscream.core.*;
9 > import uk.org.iscream.util.*;
10 > import uk.org.iscream.componentmanager.*;
11  
12   /**
13 < * This Monitor watches heartbeats
13 > * This Monitor watches heartbeats.
14 > * It generates an alert when a heartbeat that is expected
15 > * does not arrive. Unlike all the other monitors, this one
16 > * is driven by an event *not* occuring, rather than an
17 > * event occuring. This means it must be actively checking
18 > * for missing heartbeat's, and thus has an extra inner class
19 > * thread.
20   *
21   * @author  $Author$
22   * @version $Id$
23   */
24 < public class Heartbeat__Monitor extends Thread implements PluginMonitor {
24 > public class Heartbeat__Monitor extends MonitorSkeleton {
25  
26   //---FINAL ATTRIBUTES---
27  
# Line 24 | Line 30 | public class Heartbeat__Monitor extends Thread impleme
30       */
31      public final String REVISION = "$Revision$";
32      
33 +    /**
34 +     * A description of this monitor
35 +     */
36      public final String DESC = "Monitors Heartbeats.";
37      
38 +    /**
39 +     * The default (used if not configured) period at
40 +     * which to check for old heartbeats. (in seconds)
41 +     */
42 +    public final int DEFAULT_CHECK_PERIOD = 60;
43 +    
44   //---STATIC METHODS---
45  
46   //---CONSTRUCTORS---
32
33 //---PUBLIC METHODS---
47      
48 <    public void run() {
49 <        ConfigurationProxy cp = ConfigurationProxy.getInstance();
50 <        while(true) {
51 <            // this cycle period could be done better, maybe ?
52 <            int checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
53 <            
54 <            // perform the checks (use HB hash, although they *should* be the same)
42 <            Iterator i = _hostsHB.keySet().iterator();
43 <            while(i.hasNext()) {
44 <                // get host
45 <                String source = (String) i.next();
46 <                // check it
47 <                analyseHB(source);
48 <            }
49 <            
50 <            // wait a while
51 <            try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
52 <        }
48 >    /**
49 >     * Constructs a new Heartbeat monitor, and starts off
50 >     * the worker thread.
51 >     */
52 >    public Heartbeat__Monitor() {
53 >        super();
54 >        new HeartbeatWorker().start();
55      }
54    
55    // only use attribute num 0 :)
56    public void analyseHB(String source) {
57        ConfigurationProxy cp = ConfigurationProxy.getInstance();
58        Register reg = (Register) _hostsReg.get(source);
59        
60        // get host's HB interval (seconds)
61        int hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "TCPUpdateTime"));
62        // get host's last HB time (seconds)
63        int lastHeartbeat = ((HeartbeatHolder) _hostsHB.get(source)).getLastHeartbeat();
64        // time since last heartbeat (seconds)
65        int timeSinceLastHB = ((int) (System.currentTimeMillis()/1000)) - lastHeartbeat;
66        
67        // find out the threshold level we're at
68        int result = checkAttributeThreshold(timeSinceLastHB, reg);
69            
70        // decide what threshold level we're on, if we've changed, record that
71        if (result != reg.getLastThresholdLevel(0)) {
72            reg.setLastThresholdLevel(0, result);
73        }
74            
75        // as long as this isn't a normal level
76        if(reg.getLastThresholdLevel(0) != Alert.thresholdNORMAL) {
77            // if the time since the last alert is more than the time for
78            // its timeout, fire an alert, escalate the alert
79            long timeout = reg.getLastAlertTimeout(0);
80            if ((timeout > 0) && (reg.getTimeLastSent(0) > 0)) {
81                if((System.currentTimeMillis() - reg.getTimeLastSent(0)) > timeout) {
82                    int lastAlert = reg.getLastAlertLevel(0);
83                    reg.escalateAlert(0);
84                    reg.setTimeLastSent(0, System.currentTimeMillis());
85                    reg.setLastAlertTimeout(0, reg.getAlertTimeout(reg.getLastAlertLevel(0), 0));
86                    // -- SEND
87                    fireAlert(source, timeSinceLastHB, reg, lastAlert);
88                }
89            // if we don't have a timeout configured...we got STRAIGHT to the next level
90            } else {
91                int lastAlert = reg.getLastAlertLevel(0);
92                reg.escalateAlert(0);
93                reg.setTimeLastSent(0, System.currentTimeMillis());
94                reg.setLastAlertTimeout(0, reg.getAlertTimeout(reg.getLastAlertLevel(0), 0));
95                // -- SEND
96                fireAlert(source, timeSinceLastHB, reg, lastAlert);
97            }
98                
99        // we must be on ok, check the timeout value for this
100        } else {
101            // if we were on an OK alert before, then we don't do anything
102            // but if we weren't we only set OK, once the timeout of the last
103            // alert has occourd
104            if (reg.getLastAlertLevel(0) != Alert.alertOK) {
105                long timeout = reg.getLastAlertTimeout(0);
106                if ((timeout > 0) && (reg.getTimeLastSent(0) > 0)) {
107                    if ((System.currentTimeMillis() - reg.getTimeLastSent(0)) > timeout) {
108                        int lastAlert = reg.getLastAlertLevel(0);
109                        reg.setLastAlertLevel(0, Alert.alertOK);
110                        reg.setTimeLastSent(0, System.currentTimeMillis());
111                        reg.setLastAlertTimeout(0, timeout);
112                        // -- SEND
113                        fireAlert(source, timeSinceLastHB, reg, lastAlert);
114                    }
115                }
116            }
117        }
118    }
56  
57 + //---PUBLIC METHODS---
58 +    
59 +    /**
60 +     * Analyse a packet of data. In this case, this will just
61 +     * register the fact that a heartbeat has arrived.
62 +     *
63 +     * @param packet The packet of data to analyse
64 +     */
65      public void analysePacket(XMLPacket packet) {
66 <        if (packet.getParam("packet.attributes.type").equals("heartbeat")) {
67 <            String source = packet.getParam("packet.attributes.machine_name");
68 <            if (!_hostsHB.containsKey(source)) {
69 <                _hostsReg.put(source, new Register(source, _name, 1));
125 <                _hostsHB.put(source, new HeartbeatHolder());
66 >        String source = packet.getParam("packet.attributes.machine_name");
67 >        if (!_hosts.containsKey(source)) {
68 >            synchronized(this) {
69 >                _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
70              }
127            HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hostsReg.get(source);
128            lastHeartbeat.setLastHeartbeat((int)System.currentTimeMillis()/1000);
71          }
72 +        HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
73 +        lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
74      }
75      
76      /**
77       * Overrides the {@link java.lang.Object#toString() Object.toString()}
78       * method to provide clean logging (every class should have this).
79       *
80 <     * This uses the uk.ac.ukc.iscream.util.NameFormat class
80 >     * This uses the uk.org.iscream.util.NameFormat class
81       * to format the toString()
82       *
83       * @return the name of this class and its CVS revision
# Line 154 | Line 98 | public class Heartbeat__Monitor extends Thread impleme
98  
99   //---PRIVATE METHODS---
100      
101 <    private int checkAttributeThreshold(int timeSinceLastHB, Register reg) {
101 >    /**
102 >     * Checks whether the time since the last heartbeat
103 >     * is beyond the threshold(s).
104 >     *
105 >     * @param timeSinceLastHB a long time since the last heartbeat arrived
106 >     * @param reg the Register for this host
107 >     * @return the level which has been breached, if any
108 >     */
109 >    private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
110          for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
111              if (reg.getThreshold(thresholdLevel) != -1.0) {
112 <                if (reg.getThreshold(thresholdLevel) < timeSinceLastHB) {
112 >                if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) {
113                      return thresholdLevel;
114                  }
115              }
116          }
117 <        return 0;
117 >        return Alert.thresholdNORMAL;
118      }
167        
168    private void fireAlert(String source, int timeSinceLastHB, Register reg, int lastAlert) {
169        int alertLevel = reg.getLastAlertLevel(0);
170        int thresholdLevel = reg.getLastThresholdLevel(0);
171        String currentValue = String.valueOf(timeSinceLastHB);
172        String attributeName = "Heartbeat";
173        String thresholdValue = String.valueOf(reg.getThreshold(thresholdLevel));
174        String time = Long.toString(reg.getAlertTimeout(reg.getLastAlertLevel(0), 0) / 1000);
175        if (thresholdLevel == Alert.thresholdNORMAL) {
176            thresholdValue = "-";
177        }
178        if (alertLevel == Alert.alertOK) {
179            time = "0";
180        }
181        Alert alert = new Alert(alertLevel, lastAlert, thresholdLevel, source, thresholdValue, currentValue, attributeName, time);
182        _alerterQueue.add(alert);
183        _logger.write(toString(), Logger.DEBUG, "Fired alert for source:" + source + " at alert level:" + Alert.alertLevels[alertLevel] + " on:" + attributeName + " for threshold level:" + Alert.thresholdLevels[thresholdLevel] + " at:" +  currentValue + " exceeding threshold of:" +thresholdValue + " next alert sent in:" + time + "secs");
184    }
119  
120   //---ACCESSOR/MUTATOR METHODS---
121 <
121 >    
122 >    /**
123 >     * Returns a reference to the Queue we're getting data
124 >     * from. This is specific to this monitor.
125 >     *
126 >     * @return a reference to a Queue to get data from
127 >     */
128 >    protected Queue getQueue() {
129 >        return MonitorManager.getInstance().getHeartbeatQueue();
130 >    }
131 >    
132   //---ATTRIBUTES---
133  
134      /**
# Line 197 | Line 141 | public class Heartbeat__Monitor extends Thread impleme
141       * be changed to null for utility classes.
142       */
143      private String _name = "Heartbeat";
144 <
144 >    
145      /**
146 <     * This holds a reference to the
203 <     * system logger that is being used.
146 >     * A reference to the configuration proxy in use
147       */
148 <    private Logger _logger = ReferenceManager.getInstance().getLogger();
148 >    private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
149      
150 <    private Queue _alerterQueue = ClientMain._alerterQueue;
150 >    /**
151 >     * A HashMap of hosts, with associated HeartbeatHolder's.
152 >     */
153 >    private HashMap _hosts = new HashMap();
154      
155      /**
156 <     * A reference to the configuration proxy in use
156 >     * A reference to the system logger.
157       */
158 <    private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
158 >    private Logger _logger = ReferenceManager.getInstance().getLogger();
159  
214    private HashMap _hostsHB = new HashMap();
215    private HashMap _hostsReg = new HashMap();
216
160   //---STATIC ATTRIBUTES---
161  
162   //---INNER CLASSES---
163 <
163 >    
164 >    /**
165 >     * This inner class simply holding some information
166 >     * about a specific host.
167 >     */
168      private class HeartbeatHolder {
169          
170 <        public void setLastHeartbeat(int lastHeartbeat) {
170 >        /**
171 >         * Construct a new HeartbeatHolder.
172 >         */
173 >        public HeartbeatHolder(Register register) {
174 >            _register = register;
175 >        }
176 >        
177 >        /**
178 >         * Set the time of the last heartbeat
179 >         */
180 >        public void setLastHeartbeat(long lastHeartbeat) {
181              _lastHeartbeat = lastHeartbeat;
182          }
183          
184 <        public int getLastHeartbeat() {
184 >        /**
185 >         * Get the time of the last heartbeat
186 >         */
187 >        public long getLastHeartbeat() {
188              return _lastHeartbeat;
189          }
190          
191 <        private int _lastHeartbeat;
192 <    }  
193 <
191 >        /**
192 >         * Get the Register
193 >         */
194 >        public Register getRegister() {
195 >            return _register;
196 >        }
197 >        
198 >        /**
199 >         * last heartbeat time
200 >         */
201 >        private long _lastHeartbeat;
202 >        
203 >        /**
204 >         * register ref
205 >         */
206 >        private Register _register;
207 >    }
208 >    
209 >    /**
210 >     * This worker thread just checks all the hosts and then
211 >     * waits a period of time before doing it again. It sends
212 >     * Alerts as required.
213 >     */
214 >    private class HeartbeatWorker extends Thread {
215 >        
216 >        /**
217 >         * The main run method of this worker thread. It simply
218 >         * checks through all the hosts it has stored, running
219 >         * the analyseHB method on each. It then removes any
220 >         * that have passed a FINAL, and waits a (configured)
221 >         * length of time before doing it again.
222 >         */
223 >        public void run() {
224 >            ConfigurationProxy cp = ConfigurationProxy.getInstance();
225 >            while(true) {
226 >                // this cycle period of this monitor's checks
227 >                int checkPeriod = 0;
228 >                try {
229 >                    checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
230 >                } catch (PropertyNotFoundException e) {
231 >                    checkPeriod = DEFAULT_CHECK_PERIOD;
232 >                    _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
233 >                } catch (NumberFormatException e) {
234 >                    checkPeriod = DEFAULT_CHECK_PERIOD;
235 >                    _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
236 >                }
237 >                
238 >                synchronized(Heartbeat__Monitor.this) {
239 >                    // perform the checks (use HB hash, although they *should* be the same)
240 >                    Iterator i = _hosts.keySet().iterator();
241 >                    while(i.hasNext()) {
242 >                        // get host
243 >                        String source = (String) i.next();
244 >                        // check it
245 >                        boolean remove = analyseHB(source);
246 >                        // remove it if it's passed a FINAL
247 >                        if(remove) {
248 >                            i.remove();
249 >                        }
250 >                    }
251 >                }
252 >                
253 >                // wait a while
254 >                try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
255 >            }
256 >        }
257 >        
258 >        /**
259 >         * Analyses a given host's state, and if need be generates
260 >         * a relevant Alert. Note that it also checks if the last
261 >         * alert sent is FINAL, in which case it returns true to
262 >         * indicate removal of this host.
263 >         *
264 >         * @param source the host to check
265 >         * @return whether this host can be deleted
266 >         */
267 >        private boolean analyseHB(String source) {
268 >            ConfigurationProxy cp = ConfigurationProxy.getInstance();
269 >            HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
270 >            Register reg = hbHolder.getRegister();
271 >            
272 >            // get host's HB interval (seconds)
273 >            // this should always exist, thus we set to 0
274 >            int hostHBinterval = 0;
275 >            try {
276 >                hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
277 >            } catch (PropertyNotFoundException e) {
278 >                hostHBinterval = 0;
279 >                _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
280 >            } catch (NumberFormatException e) {
281 >                hostHBinterval = 0;
282 >                _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
283 >            }
284 >            
285 >            // get host's last HB time (seconds)
286 >            long lastHeartbeat = hbHolder.getLastHeartbeat();
287 >            // time since last heartbeat (seconds)
288 >            long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
289 >            // time since (or until if negative) the expected heartbeat
290 >            long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
291 >            
292 >            // best do a check in case the expected heartbeat is in the future
293 >            if(timeSinceExpectedHB < 0) {
294 >                timeSinceExpectedHB = 0;
295 >            }
296 >            
297 >            // find out the threshold level we're at
298 >            int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
299 >            
300 >            // process the alert
301 >            Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
302 >            
303 >            if(reg.getLastAlertLevel() == Alert.alertFINAL) {
304 >                return true;
305 >            }
306 >            return false;
307 >        }
308 >    }
309   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines