ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java
(Generate patch)

Comparing projects/cms/source/server/uk/org/iscream/cms/server/client/monitors/Heartbeat__Monitor.java (file contents):
Revision 1.6 by tdb, Tue Mar 6 20:26:01 2001 UTC vs.
Revision 1.21 by tdb, Mon Nov 26 12:56:33 2001 UTC

# Line 1 | Line 1
1   //---PACKAGE DECLARATION---
2 < package uk.ac.ukc.iscream.client.monitors;
2 > package uk.org.iscream.cms.server.client.monitors;
3  
4   //---IMPORTS---
5   import java.util.HashMap;
6   import java.util.Iterator;
7 < import uk.ac.ukc.iscream.client.*;
8 < import uk.ac.ukc.iscream.core.*;
9 < import uk.ac.ukc.iscream.util.*;
10 < import uk.ac.ukc.iscream.componentmanager.*;
7 > import java.util.StringTokenizer;
8 > import uk.org.iscream.cms.server.client.*;
9 > import uk.org.iscream.cms.server.core.*;
10 > import uk.org.iscream.cms.server.util.*;
11 > import uk.org.iscream.cms.server.componentmanager.*;
12  
13   /**
14 < * This Monitor watches heartbeats
14 > * This Monitor watches heartbeats.
15 > * It generates an alert when a heartbeat that is expected
16 > * does not arrive. Unlike all the other monitors, this one
17 > * is driven by an event *not* occuring, rather than an
18 > * event occuring. This means it must be actively checking
19 > * for missing heartbeat's, and thus has an extra inner class
20 > * thread.
21   *
22   * @author  $Author$
23   * @version $Id$
24   */
25 < public class Heartbeat__Monitor implements PluginMonitor, Runnable {
25 > public class Heartbeat__Monitor extends MonitorSkeleton {
26  
27   //---FINAL ATTRIBUTES---
28  
# Line 24 | Line 31 | public class Heartbeat__Monitor implements PluginMonit
31       */
32      public final String REVISION = "$Revision$";
33      
34 +    /**
35 +     * A description of this monitor
36 +     */
37      public final String DESC = "Monitors Heartbeats.";
38      
39 +    /**
40 +     * The default (used if not configured) period at
41 +     * which to check for old heartbeats. (in seconds)
42 +     */
43      public final int DEFAULT_CHECK_PERIOD = 60;
44      
45   //---STATIC METHODS---
46  
47   //---CONSTRUCTORS---
48 <
48 >    
49 >    /**
50 >     * Constructs a new Heartbeat monitor, and starts off
51 >     * the worker thread.
52 >     */
53      public Heartbeat__Monitor() {
54 <        new Thread(this).start();
54 >        super();
55 >        createInitialHosts();
56 >        new HeartbeatWorker().start();
57      }
58  
59   //---PUBLIC METHODS---
60      
61 <    public void run() {
62 <        ConfigurationProxy cp = ConfigurationProxy.getInstance();
63 <        while(true) {
64 <            // this cycle period of this monitor's checks
65 <            int checkPeriod = 0;
66 <            try {
47 <                checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
48 <            } catch (PropertyNotFoundException e) {
49 <                checkPeriod = DEFAULT_CHECK_PERIOD;
50 <                _logger.write(toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
51 <            } catch (NumberFormatException e) {
52 <                checkPeriod = DEFAULT_CHECK_PERIOD;
53 <                _logger.write(toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
54 <            }
55 <            
56 <            // perform the checks (use HB hash, although they *should* be the same)
57 <            Iterator i = _hosts.keySet().iterator();
58 <            while(i.hasNext()) {
59 <                // get host
60 <                String source = (String) i.next();
61 <                // check it
62 <                boolean remove = analyseHB(source);
63 <                if(remove) {
64 <                    i.remove();
65 <                }
66 <            }
67 <            
68 <            // wait a while
69 <            try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
70 <        }
71 <    }
72 <    
73 <    // only use attribute num 0 :)
74 <    public boolean analyseHB(String source) {
75 <        ConfigurationProxy cp = ConfigurationProxy.getInstance();
76 <        HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
77 <        Register reg = (Register) ((HashMap) hbHolder.getRegisterHash()).get(source);
78 <        
79 <        // get host's HB interval (seconds)
80 <        // this should always exist, thus we set to 0
81 <        int hostHBinterval = 0;
82 <        try {
83 <            hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
84 <        } catch (PropertyNotFoundException e) {
85 <            hostHBinterval = 0;
86 <            _logger.write(toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
87 <        } catch (NumberFormatException e) {
88 <            hostHBinterval = 0;
89 <            _logger.write(toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
90 <        }
91 <
92 <        // get host's last HB time (seconds)
93 <        long lastHeartbeat = hbHolder.getLastHeartbeat();
94 <        // time since last heartbeat (seconds)
95 <        long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
96 <        
97 <        // find out the threshold level we're at
98 <        int result = checkAttributeThreshold(timeSinceLastHB, reg);
99 <            
100 <        // decide what threshold level we're on, if we've changed, record that
101 <        if (result != reg.getLastThresholdLevel(0)) {
102 <            reg.setLastThresholdLevel(0, result);
103 <        }
104 <            
105 <        // as long as this isn't a normal level
106 <        if(reg.getLastThresholdLevel(0) != Alert.thresholdNORMAL) {
107 <            // if the time since the last alert is more than the time for
108 <            // its timeout, fire an alert, escalate the alert
109 <            long timeout = reg.getLastAlertTimeout(0);
110 <            if ((timeout > 0) && (reg.getTimeLastSent(0) > 0)) {
111 <                if((System.currentTimeMillis() - reg.getTimeLastSent(0)) > timeout) {
112 <                    int lastAlert = reg.getLastAlertLevel(0);
113 <                    reg.escalateAlert(0);
114 <                    reg.setTimeLastSent(0, System.currentTimeMillis());
115 <                    reg.setLastAlertTimeout(0, reg.getAlertTimeout(reg.getLastAlertLevel(0), 0));
116 <                    // -- SEND
117 <                    fireAlert(source, timeSinceLastHB, reg, lastAlert);
118 <                }
119 <            // if we don't have a timeout configured...we got STRAIGHT to the next level
120 <            } else {
121 <                int lastAlert = reg.getLastAlertLevel(0);
122 <                reg.escalateAlert(0);
123 <                reg.setTimeLastSent(0, System.currentTimeMillis());
124 <                reg.setLastAlertTimeout(0, reg.getAlertTimeout(reg.getLastAlertLevel(0), 0));
125 <                // -- SEND
126 <                fireAlert(source, timeSinceLastHB, reg, lastAlert);
127 <            }
128 <                
129 <        // we must be on ok, check the timeout value for this
130 <        } else {
131 <            // if we were on an OK alert before, then we don't do anything
132 <            // but if we weren't we only set OK, once the timeout of the last
133 <            // alert has occourd
134 <            if (reg.getLastAlertLevel(0) != Alert.alertOK) {
135 <                long timeout = reg.getLastAlertTimeout(0);
136 <                if ((timeout > 0) && (reg.getTimeLastSent(0) > 0)) {
137 <                    if ((System.currentTimeMillis() - reg.getTimeLastSent(0)) > timeout) {
138 <                        int lastAlert = reg.getLastAlertLevel(0);
139 <                        reg.setLastAlertLevel(0, Alert.alertOK);
140 <                        reg.setTimeLastSent(0, System.currentTimeMillis());
141 <                        reg.setLastAlertTimeout(0, timeout);
142 <                        // -- SEND
143 <                        fireAlert(source, timeSinceLastHB, reg, lastAlert);
144 <                    }
145 <                }
146 <            }
147 <        }
148 <        if(reg.getLastAlertLevel(0) == Alert.alertFINAL) {
149 <            return true;
150 <        }
151 <        return false;
152 <    }
153 <
61 >    /**
62 >     * Analyse a packet of data. In this case, this will just
63 >     * register the fact that a heartbeat has arrived.
64 >     *
65 >     * @param packet The packet of data to analyse
66 >     */
67      public void analysePacket(XMLPacket packet) {
68 <        if (packet.getParam("packet.attributes.type").equals("heartbeat")) {
69 <            String source = packet.getParam("packet.attributes.machine_name");
70 <            if (!_hosts.containsKey(source)) {
71 <                HashMap registerHash = new HashMap();
159 <                registerHash.put(source, new Register(source, _name, 1));
160 <                _hosts.put(source, new HeartbeatHolder(registerHash));
68 >        String source = packet.getParam("packet.attributes.machine_name");
69 >        if (!_hosts.containsKey(source)) {
70 >            synchronized(this) {
71 >                _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
72              }
162            HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
163            lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
73          }
74 +        HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
75 +        lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
76      }
77      
78      /**
79       * Overrides the {@link java.lang.Object#toString() Object.toString()}
80       * method to provide clean logging (every class should have this).
81       *
82 <     * This uses the uk.ac.ukc.iscream.util.NameFormat class
82 >     * This uses the uk.org.iscream.cms.server.util.NameFormat class
83       * to format the toString()
84       *
85       * @return the name of this class and its CVS revision
# Line 189 | Line 100 | public class Heartbeat__Monitor implements PluginMonit
100  
101   //---PRIVATE METHODS---
102      
103 +    /**
104 +     * Checks whether the time since the last heartbeat
105 +     * is beyond the threshold(s).
106 +     *
107 +     * @param timeSinceLastHB a long time since the last heartbeat arrived
108 +     * @param reg the Register for this host
109 +     * @return the level which has been breached, if any
110 +     */
111      private int checkAttributeThreshold(long timeSinceLastHB, Register reg) {
112          for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) {
113              if (reg.getThreshold(thresholdLevel) != -1.0) {
# Line 197 | Line 116 | public class Heartbeat__Monitor implements PluginMonit
116                  }
117              }
118          }
119 <        return 0;
119 >        return Alert.thresholdNORMAL;
120      }
121 +    
122 +    /**
123 +     * Gets an initial list of hosts from the config
124 +     * and adds a fake set of heartbeats for them.
125 +     * If the hosts don't respond within the timeout
126 +     * period an alert will be raised.
127 +     *
128 +     * The effect of this is to allow us to know about
129 +     * hosts which weren't on when we started up, and
130 +     * will thus never have generated a heartbeat - yet
131 +     * will still want to know they're not responding.
132 +     */
133 +    private void createInitialHosts() {
134 +        // get the initial list of hosts from the config
135 +        String initialHosts = "";
136 +        try {
137 +            initialHosts = _cp.getProperty(_name, "Monitor.Heartbeat.initialHosts");
138 +        } catch (PropertyNotFoundException e) {
139 +            // just leave initialHosts empty
140 +            _logger.write(Heartbeat__Monitor.this.toString(), Logger.DEBUG, "No initial list of hosts set, defaulting to none.");
141 +        }
142          
143 <    private void fireAlert(String source, long timeSinceLastHB, Register reg, int lastAlert) {
144 <        int alertLevel = reg.getLastAlertLevel(0);
145 <        int thresholdLevel = reg.getLastThresholdLevel(0);
146 <        String currentValue = String.valueOf(timeSinceLastHB);
147 <        String attributeName = "Heartbeat";
148 <        String thresholdValue = String.valueOf(reg.getThreshold(thresholdLevel));
149 <        String timeout = Long.toString(reg.getAlertTimeout(reg.getLastAlertLevel(0), 0) / 1000);
150 <        if (thresholdLevel == Alert.thresholdNORMAL) {
151 <            thresholdValue = "-";
143 >        // parse through the initial hosts adding them
144 >        StringTokenizer st = new StringTokenizer(initialHosts, ";");
145 >        while (st.hasMoreTokens()) {
146 >            String source = st.nextToken();
147 >            // check if they already exist, don't want to add them twice
148 >            if (!_hosts.containsKey(source)) {
149 >                synchronized(this) {
150 >                    _hosts.put(source, new HeartbeatHolder(new Register(source, _name)));
151 >                }
152 >            }
153 >            HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source);
154 >            // set a "fake" heartbeat
155 >            lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000);
156          }
213        if (alertLevel == Alert.alertOK) {
214            timeout = "0";
215        }
216        Alert alert = new Alert(alertLevel, lastAlert, thresholdLevel, source, thresholdValue, currentValue, attributeName, timeout, reg.getInitialAlertTime(0));
217        _alerterQueue.add(alert);
218        _logger.write(toString(), Logger.DEBUG, "Fired alert for source:" + source + " at alert level:" + Alert.alertLevels[alertLevel] + " on:" + attributeName + " for threshold level:" + Alert.thresholdLevels[thresholdLevel] + " at:" +  currentValue + " exceeding threshold of:" +thresholdValue + " next alert sent in:" + timeout + "secs");
157      }
158  
159   //---ACCESSOR/MUTATOR METHODS---
160 <
160 >    
161 >    /**
162 >     * Returns a reference to the Queue we're getting data
163 >     * from. This is specific to this monitor.
164 >     *
165 >     * @return a reference to a Queue to get data from
166 >     */
167 >    protected Queue getQueue() {
168 >        return MonitorManager.getInstance().getHeartbeatQueue();
169 >    }
170 >    
171   //---ATTRIBUTES---
172  
173      /**
# Line 232 | Line 180 | public class Heartbeat__Monitor implements PluginMonit
180       * be changed to null for utility classes.
181       */
182      private String _name = "Heartbeat";
183 <
183 >    
184      /**
185 <     * This holds a reference to the
238 <     * system logger that is being used.
185 >     * A reference to the configuration proxy in use
186       */
187 <    private Logger _logger = ReferenceManager.getInstance().getLogger();
187 >    private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
188      
189 <    private Queue _alerterQueue = ClientMain._alerterQueue;
189 >    /**
190 >     * A HashMap of hosts, with associated HeartbeatHolder's.
191 >     */
192 >    private HashMap _hosts = new HashMap();
193      
194      /**
195 <     * A reference to the configuration proxy in use
195 >     * A reference to the system logger.
196       */
197 <    private ConfigurationProxy _cp = ConfigurationProxy.getInstance();
197 >    private Logger _logger = ReferenceManager.getInstance().getLogger();
198  
249    private HashMap _hosts = new HashMap();
250
199   //---STATIC ATTRIBUTES---
200  
201   //---INNER CLASSES---
202 <
202 >    
203 >    /**
204 >     * This inner class simply holding some information
205 >     * about a specific host.
206 >     */
207      private class HeartbeatHolder {
208          
209 <        public HeartbeatHolder(HashMap registerHash) {
210 <            _registerHash = registerHash;
209 >        /**
210 >         * Construct a new HeartbeatHolder.
211 >         */
212 >        public HeartbeatHolder(Register register) {
213 >            _register = register;
214          }
215          
216 +        /**
217 +         * Set the time of the last heartbeat
218 +         */
219          public void setLastHeartbeat(long lastHeartbeat) {
220              _lastHeartbeat = lastHeartbeat;
221          }
222          
223 +        /**
224 +         * Get the time of the last heartbeat
225 +         */
226          public long getLastHeartbeat() {
227              return _lastHeartbeat;
228          }
229          
230 <        public HashMap getRegisterHash() {
231 <            return _registerHash;
230 >        /**
231 >         * Get the Register
232 >         */
233 >        public Register getRegister() {
234 >            return _register;
235          }
236          
237 +        /**
238 +         * last heartbeat time
239 +         */
240          private long _lastHeartbeat;
241 <        private HashMap _registerHash;
242 <    }  
243 <
241 >        
242 >        /**
243 >         * register ref
244 >         */
245 >        private Register _register;
246 >    }
247 >    
248 >    /**
249 >     * This worker thread just checks all the hosts and then
250 >     * waits a period of time before doing it again. It sends
251 >     * Alerts as required.
252 >     */
253 >    private class HeartbeatWorker extends Thread {
254 >        
255 >        /**
256 >         * The main run method of this worker thread. It simply
257 >         * checks through all the hosts it has stored, running
258 >         * the analyseHB method on each. It then removes any
259 >         * that have passed a FINAL, and waits a (configured)
260 >         * length of time before doing it again.
261 >         */
262 >        public void run() {
263 >            ConfigurationProxy cp = ConfigurationProxy.getInstance();
264 >            while(true) {
265 >                // this cycle period of this monitor's checks
266 >                int checkPeriod = 0;
267 >                try {
268 >                    checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod"));
269 >                } catch (PropertyNotFoundException e) {
270 >                    checkPeriod = DEFAULT_CHECK_PERIOD;
271 >                    _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds");
272 >                } catch (NumberFormatException e) {
273 >                    checkPeriod = DEFAULT_CHECK_PERIOD;
274 >                    _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds");
275 >                }
276 >                
277 >                synchronized(Heartbeat__Monitor.this) {
278 >                    // perform the checks (use HB hash, although they *should* be the same)
279 >                    Iterator i = _hosts.keySet().iterator();
280 >                    while(i.hasNext()) {
281 >                        // get host
282 >                        String source = (String) i.next();
283 >                        // check it
284 >                        boolean remove = analyseHB(source);
285 >                        // remove it if it's passed a FINAL
286 >                        if(remove) {
287 >                            i.remove();
288 >                        }
289 >                    }
290 >                }
291 >                
292 >                // wait a while
293 >                try {Thread.sleep(checkPeriod * 1000);} catch (InterruptedException e) {}
294 >            }
295 >        }
296 >        
297 >        /**
298 >         * Analyses a given host's state, and if need be generates
299 >         * a relevant Alert. Note that it also checks if the last
300 >         * alert sent is FINAL, in which case it returns true to
301 >         * indicate removal of this host.
302 >         *
303 >         * @param source the host to check
304 >         * @return whether this host can be deleted
305 >         */
306 >        private boolean analyseHB(String source) {
307 >            ConfigurationProxy cp = ConfigurationProxy.getInstance();
308 >            HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source);
309 >            Register reg = hbHolder.getRegister();
310 >            
311 >            // get host's HB interval (seconds)
312 >            // this should always exist, thus we set to 0
313 >            int hostHBinterval = 0;
314 >            try {
315 >                hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime"));
316 >            } catch (PropertyNotFoundException e) {
317 >                hostHBinterval = 0;
318 >                _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds");
319 >            } catch (NumberFormatException e) {
320 >                hostHBinterval = 0;
321 >                _logger.write(Heartbeat__Monitor.this.toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds");
322 >            }
323 >            
324 >            // get host's last HB time (seconds)
325 >            long lastHeartbeat = hbHolder.getLastHeartbeat();
326 >            // time since last heartbeat (seconds)
327 >            long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat;
328 >            // time since (or until if negative) the expected heartbeat
329 >            long timeSinceExpectedHB = timeSinceLastHB - (long) hostHBinterval;
330 >            
331 >            // best do a check in case the expected heartbeat is in the future
332 >            if(timeSinceExpectedHB < 0) {
333 >                timeSinceExpectedHB = 0;
334 >            }
335 >            
336 >            // find out the threshold level we're at
337 >            int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg);
338 >            
339 >            // process the alert
340 >            Heartbeat__Monitor.this.processAlert(newThreshold, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB));
341 >            
342 >            if(reg.getLastAlertLevel() == Alert.alertFINAL) {
343 >                return true;
344 >            }
345 >            return false;
346 >        }
347 >    }
348   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines