15 |
|
* @author $Author$ |
16 |
|
* @version $Id$ |
17 |
|
*/ |
18 |
< |
public class Heartbeat__Monitor extends Thread implements PluginMonitor { |
18 |
> |
public class Heartbeat__Monitor extends MonitorSkeleton implements Runnable { |
19 |
|
|
20 |
|
//---FINAL ATTRIBUTES--- |
21 |
|
|
26 |
|
|
27 |
|
public final String DESC = "Monitors Heartbeats."; |
28 |
|
|
29 |
+ |
public final int DEFAULT_CHECK_PERIOD = 60; |
30 |
+ |
|
31 |
|
//---STATIC METHODS--- |
32 |
|
|
33 |
|
//---CONSTRUCTORS--- |
34 |
|
|
35 |
+ |
public Heartbeat__Monitor() { |
36 |
+ |
new Thread(this).start(); |
37 |
+ |
} |
38 |
+ |
|
39 |
|
//---PUBLIC METHODS--- |
40 |
|
|
41 |
|
public void run() { |
42 |
|
ConfigurationProxy cp = ConfigurationProxy.getInstance(); |
43 |
|
while(true) { |
44 |
< |
// this cycle period could be done better, maybe ? |
45 |
< |
int checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod")); |
44 |
> |
// this cycle period of this monitor's checks |
45 |
> |
int checkPeriod = 0; |
46 |
> |
try { |
47 |
> |
checkPeriod = Integer.parseInt(cp.getProperty(_name, "Monitor.Heartbeat.checkPeriod")); |
48 |
> |
} catch (PropertyNotFoundException e) { |
49 |
> |
checkPeriod = DEFAULT_CHECK_PERIOD; |
50 |
> |
_logger.write(toString(), Logger.WARNING, "Monitor.Heartbeat.checkPeriod value unavailable using default of " + checkPeriod + " seconds"); |
51 |
> |
} catch (NumberFormatException e) { |
52 |
> |
checkPeriod = DEFAULT_CHECK_PERIOD; |
53 |
> |
_logger.write(toString(), Logger.WARNING, "Erronous Monitor.Heartbeat.checkPeriod value in configuration using default of " + checkPeriod + " seconds"); |
54 |
> |
} |
55 |
|
|
56 |
< |
// perform the checks (use HB hash, although they *should* be the same) |
57 |
< |
Iterator i = _hostsHB.keySet().iterator(); |
58 |
< |
while(i.hasNext()) { |
59 |
< |
// get host |
60 |
< |
String source = (String) i.next(); |
61 |
< |
// check it |
62 |
< |
analyseHB(source); |
56 |
> |
synchronized(this) { |
57 |
> |
// perform the checks (use HB hash, although they *should* be the same) |
58 |
> |
Iterator i = _hosts.keySet().iterator(); |
59 |
> |
while(i.hasNext()) { |
60 |
> |
// get host |
61 |
> |
String source = (String) i.next(); |
62 |
> |
// check it |
63 |
> |
boolean remove = analyseHB(source); |
64 |
> |
if(remove) { |
65 |
> |
i.remove(); |
66 |
> |
} |
67 |
> |
} |
68 |
|
} |
69 |
|
|
70 |
|
// wait a while |
72 |
|
} |
73 |
|
} |
74 |
|
|
75 |
< |
// only use attribute num 0 :) |
56 |
< |
public void analyseHB(String source) { |
57 |
< |
ConfigurationProxy cp = ConfigurationProxy.getInstance(); |
58 |
< |
Register reg = (Register) _hostsReg.get(source); |
59 |
< |
|
60 |
< |
// get host's HB interval (seconds) |
61 |
< |
int hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "TCPUpdateTime")); |
62 |
< |
// get host's last HB time (seconds) |
63 |
< |
int lastHeartbeat = ((HeartbeatHolder) _hostsHB.get(source)).getLastHeartbeat(); |
64 |
< |
// time since last heartbeat (seconds) |
65 |
< |
int timeSinceLastHB = ((int) (System.currentTimeMillis()/1000)) - lastHeartbeat; |
66 |
< |
|
67 |
< |
// find out the threshold level we're at |
68 |
< |
int result = checkAttributeThreshold(timeSinceLastHB, reg); |
69 |
< |
|
70 |
< |
// decide what threshold level we're on, if we've changed, record that |
71 |
< |
if (result != reg.getLastThresholdLevel(0)) { |
72 |
< |
reg.setLastThresholdLevel(0, result); |
73 |
< |
} |
74 |
< |
|
75 |
< |
// as long as this isn't a normal level |
76 |
< |
if(reg.getLastThresholdLevel(0) != Alert.thresholdNORMAL) { |
77 |
< |
// if the time since the last alert is more than the time for |
78 |
< |
// its timeout, fire an alert, escalate the alert |
79 |
< |
long timeout = reg.getLastAlertTimeout(0); |
80 |
< |
if ((timeout > 0) && (reg.getTimeLastSent(0) > 0)) { |
81 |
< |
if((System.currentTimeMillis() - reg.getTimeLastSent(0)) > timeout) { |
82 |
< |
int lastAlert = reg.getLastAlertLevel(0); |
83 |
< |
reg.escalateAlert(0); |
84 |
< |
reg.setTimeLastSent(0, System.currentTimeMillis()); |
85 |
< |
reg.setLastAlertTimeout(0, reg.getAlertTimeout(reg.getLastAlertLevel(0), 0)); |
86 |
< |
// -- SEND |
87 |
< |
fireAlert(source, timeSinceLastHB, reg, lastAlert); |
88 |
< |
} |
89 |
< |
// if we don't have a timeout configured...we got STRAIGHT to the next level |
90 |
< |
} else { |
91 |
< |
int lastAlert = reg.getLastAlertLevel(0); |
92 |
< |
reg.escalateAlert(0); |
93 |
< |
reg.setTimeLastSent(0, System.currentTimeMillis()); |
94 |
< |
reg.setLastAlertTimeout(0, reg.getAlertTimeout(reg.getLastAlertLevel(0), 0)); |
95 |
< |
// -- SEND |
96 |
< |
fireAlert(source, timeSinceLastHB, reg, lastAlert); |
97 |
< |
} |
98 |
< |
|
99 |
< |
// we must be on ok, check the timeout value for this |
100 |
< |
} else { |
101 |
< |
// if we were on an OK alert before, then we don't do anything |
102 |
< |
// but if we weren't we only set OK, once the timeout of the last |
103 |
< |
// alert has occourd |
104 |
< |
if (reg.getLastAlertLevel(0) != Alert.alertOK) { |
105 |
< |
long timeout = reg.getLastAlertTimeout(0); |
106 |
< |
if ((timeout > 0) && (reg.getTimeLastSent(0) > 0)) { |
107 |
< |
if ((System.currentTimeMillis() - reg.getTimeLastSent(0)) > timeout) { |
108 |
< |
int lastAlert = reg.getLastAlertLevel(0); |
109 |
< |
reg.setLastAlertLevel(0, Alert.alertOK); |
110 |
< |
reg.setTimeLastSent(0, System.currentTimeMillis()); |
111 |
< |
reg.setLastAlertTimeout(0, timeout); |
112 |
< |
// -- SEND |
113 |
< |
fireAlert(source, timeSinceLastHB, reg, lastAlert); |
114 |
< |
} |
115 |
< |
} |
116 |
< |
} |
117 |
< |
} |
118 |
< |
} |
119 |
< |
|
120 |
< |
public void analysePacket(XMLPacket packet) { |
75 |
> |
public synchronized void analysePacket(XMLPacket packet) { |
76 |
|
if (packet.getParam("packet.attributes.type").equals("heartbeat")) { |
77 |
|
String source = packet.getParam("packet.attributes.machine_name"); |
78 |
< |
if (!_hostsHB.containsKey(source)) { |
79 |
< |
_hostsReg.put(source, new Register(source, _name, 1)); |
80 |
< |
_hostsHB.put(source, new HeartbeatHolder()); |
78 |
> |
if (!_hosts.containsKey(source)) { |
79 |
> |
HashMap registerHash = new HashMap(); |
80 |
> |
registerHash.put(source, new Register(source, _name, 1)); |
81 |
> |
_hosts.put(source, new HeartbeatHolder(registerHash)); |
82 |
|
} |
83 |
< |
HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hostsReg.get(source); |
84 |
< |
lastHeartbeat.setLastHeartbeat((int)System.currentTimeMillis()/1000); |
83 |
> |
HeartbeatHolder lastHeartbeat = (HeartbeatHolder) _hosts.get(source); |
84 |
> |
lastHeartbeat.setLastHeartbeat(System.currentTimeMillis()/1000); |
85 |
|
} |
86 |
|
} |
87 |
|
|
109 |
|
} |
110 |
|
|
111 |
|
//---PRIVATE METHODS--- |
112 |
+ |
|
113 |
+ |
private boolean analyseHB(String source) { |
114 |
+ |
ConfigurationProxy cp = ConfigurationProxy.getInstance(); |
115 |
+ |
HeartbeatHolder hbHolder = (HeartbeatHolder) _hosts.get(source); |
116 |
+ |
Register reg = (Register) ((HashMap) hbHolder.getRegisterHash()).get(source); |
117 |
+ |
|
118 |
+ |
// get host's HB interval (seconds) |
119 |
+ |
// this should always exist, thus we set to 0 |
120 |
+ |
int hostHBinterval = 0; |
121 |
+ |
try { |
122 |
+ |
hostHBinterval = Integer.parseInt(cp.getProperty("Host."+source, "Host.TCPUpdateTime")); |
123 |
+ |
} catch (PropertyNotFoundException e) { |
124 |
+ |
hostHBinterval = 0; |
125 |
+ |
_logger.write(toString(), Logger.WARNING, "TCPUpdateTime value unavailable using default of " + hostHBinterval + " seconds"); |
126 |
+ |
} catch (NumberFormatException e) { |
127 |
+ |
hostHBinterval = 0; |
128 |
+ |
_logger.write(toString(), Logger.WARNING, "Erronous TCPUpdateTime value in configuration using default of " + hostHBinterval + " seconds"); |
129 |
+ |
} |
130 |
+ |
|
131 |
+ |
// get host's last HB time (seconds) |
132 |
+ |
long lastHeartbeat = hbHolder.getLastHeartbeat(); |
133 |
+ |
// time since last heartbeat (seconds) |
134 |
+ |
long timeSinceLastHB = (System.currentTimeMillis()/1000) - lastHeartbeat; |
135 |
+ |
// time since (or until if negative) the expected heartbeat |
136 |
+ |
long timeSinceExpectedHB = timeSinceLastHB + (long) hostHBinterval; |
137 |
+ |
|
138 |
+ |
// best do a check in case the expected heartbeat is in the future |
139 |
+ |
if(timeSinceExpectedHB < 0) { |
140 |
+ |
timeSinceExpectedHB = 0; |
141 |
+ |
} |
142 |
+ |
|
143 |
+ |
// find out the threshold level we're at |
144 |
+ |
int newThreshold = checkAttributeThreshold(timeSinceExpectedHB, reg); |
145 |
+ |
|
146 |
+ |
// process the alert |
147 |
+ |
processAlert(newThreshold, 0, "Heartbeat", reg, source, String.valueOf(timeSinceExpectedHB)); |
148 |
+ |
|
149 |
+ |
if(reg.getLastAlertLevel(0) == Alert.alertFINAL) { |
150 |
+ |
return true; |
151 |
+ |
} |
152 |
+ |
return false; |
153 |
+ |
} |
154 |
|
|
155 |
< |
private int checkAttributeThreshold(int timeSinceLastHB, Register reg) { |
155 |
> |
private int checkAttributeThreshold(long timeSinceLastHB, Register reg) { |
156 |
|
for(int thresholdLevel = Alert.thresholdLevels.length - 1; thresholdLevel >= 0; thresholdLevel--) { |
157 |
|
if (reg.getThreshold(thresholdLevel) != -1.0) { |
158 |
< |
if (reg.getThreshold(thresholdLevel) < timeSinceLastHB) { |
158 |
> |
if (((long) reg.getThreshold(thresholdLevel)) < timeSinceLastHB) { |
159 |
|
return thresholdLevel; |
160 |
|
} |
161 |
|
} |
162 |
|
} |
163 |
< |
return 0; |
163 |
> |
return Alert.thresholdNORMAL; |
164 |
|
} |
167 |
– |
|
168 |
– |
private void fireAlert(String source, int timeSinceLastHB, Register reg, int lastAlert) { |
169 |
– |
int alertLevel = reg.getLastAlertLevel(0); |
170 |
– |
int thresholdLevel = reg.getLastThresholdLevel(0); |
171 |
– |
String currentValue = String.valueOf(timeSinceLastHB); |
172 |
– |
String attributeName = "Heartbeat"; |
173 |
– |
String thresholdValue = String.valueOf(reg.getThreshold(thresholdLevel)); |
174 |
– |
String time = Long.toString(reg.getAlertTimeout(reg.getLastAlertLevel(0), 0) / 1000); |
175 |
– |
if (thresholdLevel == Alert.thresholdNORMAL) { |
176 |
– |
thresholdValue = "-"; |
177 |
– |
} |
178 |
– |
if (alertLevel == Alert.alertOK) { |
179 |
– |
time = "0"; |
180 |
– |
} |
181 |
– |
Alert alert = new Alert(alertLevel, lastAlert, thresholdLevel, source, thresholdValue, currentValue, attributeName, time); |
182 |
– |
_alerterQueue.add(alert); |
183 |
– |
_logger.write(toString(), Logger.DEBUG, "Fired alert for source:" + source + " at alert level:" + Alert.alertLevels[alertLevel] + " on:" + attributeName + " for threshold level:" + Alert.thresholdLevels[thresholdLevel] + " at:" + currentValue + " exceeding threshold of:" +thresholdValue + " next alert sent in:" + time + "secs"); |
184 |
– |
} |
165 |
|
|
166 |
|
//---ACCESSOR/MUTATOR METHODS--- |
167 |
|
|
177 |
|
* be changed to null for utility classes. |
178 |
|
*/ |
179 |
|
private String _name = "Heartbeat"; |
200 |
– |
|
201 |
– |
/** |
202 |
– |
* This holds a reference to the |
203 |
– |
* system logger that is being used. |
204 |
– |
*/ |
205 |
– |
private Logger _logger = ReferenceManager.getInstance().getLogger(); |
180 |
|
|
207 |
– |
private Queue _alerterQueue = ClientMain._alerterQueue; |
208 |
– |
|
181 |
|
/** |
182 |
|
* A reference to the configuration proxy in use |
183 |
|
*/ |
184 |
|
private ConfigurationProxy _cp = ConfigurationProxy.getInstance(); |
185 |
|
|
186 |
< |
private HashMap _hostsHB = new HashMap(); |
215 |
< |
private HashMap _hostsReg = new HashMap(); |
186 |
> |
private HashMap _hosts = new HashMap(); |
187 |
|
|
188 |
|
//---STATIC ATTRIBUTES--- |
189 |
|
|
191 |
|
|
192 |
|
private class HeartbeatHolder { |
193 |
|
|
194 |
< |
public void setLastHeartbeat(int lastHeartbeat) { |
194 |
> |
public HeartbeatHolder(HashMap registerHash) { |
195 |
> |
_registerHash = registerHash; |
196 |
> |
} |
197 |
> |
|
198 |
> |
public void setLastHeartbeat(long lastHeartbeat) { |
199 |
|
_lastHeartbeat = lastHeartbeat; |
200 |
|
} |
201 |
|
|
202 |
< |
public int getLastHeartbeat() { |
202 |
> |
public long getLastHeartbeat() { |
203 |
|
return _lastHeartbeat; |
204 |
|
} |
205 |
|
|
206 |
< |
private int _lastHeartbeat; |
206 |
> |
public HashMap getRegisterHash() { |
207 |
> |
return _registerHash; |
208 |
> |
} |
209 |
> |
|
210 |
> |
private long _lastHeartbeat; |
211 |
> |
private HashMap _registerHash; |
212 |
|
} |
213 |
|
|
214 |
|
} |