ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/host/ihost-perl/ihost.pl
Revision: 1.34
Committed: Thu Mar 22 08:53:59 2001 UTC (23 years, 7 months ago) by pjm2
Content type: text/plain
Branch: MAIN
CVS Tags: PROJECT_COMPLETION
Changes since 1.33: +8 -3 lines
Log Message:
If, during a heartbeat, the host receives an ERROR response from sending
the last_modified time of its configuration, the host is expected to
reconfigure itself.  If the configuration changes many times in a short
period of time, many heartbeats may be lost if they bail out as soon as
the ERROR response is received.  This has now been changed so that a
heartbeat is completed in its entirety before the host attempts to
reconfigure with the filter manager.

File Contents

# User Rev Content
1 pjm2 1.6 #!/usr/bin/perl -w
2 pjm2 1.1
3     # -----------------------------------------------------------
4     # Perl i-scream Host.
5 pjm2 1.12 # http://www.i-scream.org.uk
6 pjm2 1.1 #
7     # An all-in-one script to act as an i-scream host on
8     # a typical Unix/Linux box. You may adapt the data-gathering
9     # methods as you see fit.
10     # - pjm2@ukc.ac.uk
11     #
12 pjm2 1.28 # $Author: pjm2 $
13 pjm2 1.34 # $Id: ihost.pl,v 1.33 2001/03/20 12:58:22 pjm2 Exp $
14 pjm2 1.1 #------------------------------------------------------------
15    
16     $| = 1;
17    
18     use strict;
19     use IO::Socket;
20     use Sys::Hostname;
21    
22     use vars qw (
23     $filter_manager_addr
24     $filter_manager_port
25     $seq_no
26     $udp_update_time
27     $tcp_update_time
28     $last_udp_time
29     $last_tcp_time
30     $last_modified
31     $udp_port
32     $tcp_port
33     $filter_addr
34     $file_list
35 pjm2 1.17 $fqdn
36 tdb 1.25 $pidfile
37 pjm2 1.28 $retry_wait
38 pjm2 1.1 );
39    
40     if (@ARGV != 2) {
41     die "Usage: ihost.pl [i-scream filter manager] [TCP port]\n";
42     }
43    
44     $filter_manager_addr = $ARGV[0];
45     $filter_manager_port = $ARGV[1];
46    
47     $seq_no = 1;
48 pjm2 1.33 $retry_wait = 60;
49 pjm2 1.1
50 tdb 1.25 # write our PID to a file
51 tdb 1.26 $pidfile = "/var/tmp/ihost.pid";
52 tdb 1.25 &write_pid();
53    
54 pjm2 1.1 &tcp_configure();
55     &send_udp_packet();
56    
57     $last_udp_time = time;
58     $last_tcp_time = time;
59     while (1) {
60 pjm2 1.9 my($time) = time;
61     if ($time >= $last_udp_time + $udp_update_time) {
62 pjm2 1.1 &send_udp_packet();
63 pjm2 1.9 $last_udp_time = $time;
64 pjm2 1.1 }
65 pjm2 1.9 if ($time >= $last_tcp_time + $tcp_update_time) {
66 pjm2 1.1 &send_tcp_heartbeat();
67 pjm2 1.9 $last_tcp_time = $time;
68 pjm2 1.1 }
69 pjm2 1.9 my($next_udp) = $udp_update_time - $time + $last_udp_time;
70     my($next_tcp) = $tcp_update_time - $time + $last_tcp_time;
71     my($delay);
72     if ($next_udp < $next_tcp) {
73     $delay = $next_udp
74     }
75     else {
76     $delay = $next_tcp;
77     }
78     `sleep $delay`;
79 pjm2 1.1 }
80    
81 tdb 1.25 # we'll probably never get here...
82     `rm -f $pidfile`;
83 pjm2 1.1 exit(0);
84    
85 pjm2 1.12
86     #-----------------------------------------------------------------------
87 pjm2 1.28 # wait_then_retry
88     # Waits for the period of time specified in $retry_wait, then attempts
89     # to reconfigure with the server.
90     #-----------------------------------------------------------------------
91     sub wait_then_retry() {
92     print "Will retry configuration with filter manager in $retry_wait seconds.\n";
93     `sleep $retry_wait`;
94     }
95    
96    
97     #-----------------------------------------------------------------------
98 pjm2 1.12 # tcp_configure
99     # Establishes a TCP connection to the specified i-scream filter manager.
100     # The host then requests details from the server, such as the intervals
101     # at which to send UDP packets.
102     #-----------------------------------------------------------------------
103 pjm2 1.1 sub tcp_configure() {
104    
105 pjm2 1.31 while (1) {
106 pjm2 1.28 my($sock) = new IO::Socket::INET(
107     PeerAddr => $filter_manager_addr,
108     PeerPort => $filter_manager_port,
109     Proto => 'tcp'
110     ) or die "Cannot connect!";
111     if (!defined $sock) {
112     print "IHOST ERROR: Could not connect to $filter_manager_addr:$filter_manager_port.\n";
113     print "Please check that there is an i-scream server at this address.\n";
114     wait_then_retry();
115     next;
116     }
117    
118     # Now run through the configuration process...
119     my($response);
120    
121     print $sock "STARTCONFIG\n";
122     $response = <$sock>;
123 pjm2 1.32 if ($response && !($response eq "OK\n")) {
124     print "The i-scream server rejected the STARTCONFIG command.\n";
125 pjm2 1.28 close($sock);
126     wait_then_retry();
127     next;
128     }
129    
130     print "Config started okay.\n";
131    
132     print $sock "LASTMODIFIED\n";
133     $response = <$sock>;
134 pjm2 1.32 if (!$response) {
135     print "The i-scream server did not return anything for the LASTMODIFIED command.\n";
136     close($sock);
137     wait_then_retry();
138     next;
139     }
140 pjm2 1.28 chop $response;
141     $last_modified = $response;
142    
143     print "Config last modified: ". (scalar localtime $last_modified/1000) . "\n";
144    
145     print $sock "FILELIST\n";
146     $response = <$sock>;
147 pjm2 1.32 if (!$response) {
148     print "The i-scream server did not provide a configuration file list.\n";
149     close($sock);
150     wait_then_retry();
151     next;
152     }
153 pjm2 1.28 chop $response;
154     $file_list = $response;
155    
156     print "File list obtained: $file_list\n";
157    
158     print $sock "FQDN\n";
159     $response = <$sock>;
160 pjm2 1.32 if (!$response) {
161     print "The i-scream server did not tell us our FQDN.\n";
162     close($sock);
163     wait_then_retry();
164     next;
165     }
166 pjm2 1.28 chop $response;
167     $fqdn = $response;
168    
169     print "FQDN returned: $fqdn\n";
170    
171     print $sock "UDPUpdateTime\n";
172     $response = <$sock>;
173 pjm2 1.32 if (!$response) {
174     print "The i-scream server did not give us a UDPUpdateTime.\n";
175     close($sock);
176     wait_then_retry();
177     next;
178     }
179 pjm2 1.28 chop $response;
180     $udp_update_time = $response;
181    
182     print $sock "TCPUpdateTime\n";
183     $response = <$sock>;
184 pjm2 1.32 if (!$response) {
185     print "The i-scream server did not give us a TCPUpdateTime.\n";
186     close($sock);
187     wait_then_retry();
188     next;
189     }
190 pjm2 1.28 chop $response;
191     $tcp_update_time = $response;
192    
193     print "UDP packet period: $udp_update_time seconds.\nTCP heartbeat period: $tcp_update_time seconds.\n";
194    
195     print $sock "ENDCONFIG\n";
196     $response = <$sock>;
197 pjm2 1.32 if ($response && !($response eq "OK\n")) {
198 pjm2 1.28 print "ENDCONFIG command to server failed. Terminated.\n";
199     close($sock);
200     wait_then_retry();
201     next;
202     }
203    
204     print "Config ended.\n";
205    
206     print $sock "FILTER\n";
207     $response = <$sock>;
208 pjm2 1.32 if (!$response) {
209 pjm2 1.28 print "Failed: Could not get a filter address from the filter manager.\n";
210     close($sock);
211     wait_then_retry();
212     next;
213     }
214     chop $response;
215 pjm2 1.32 $response =~ /^(.*);(.*);(.*)/;
216 pjm2 1.28 if ($response eq "ERROR") {
217     print "There are no active configured filters for your host.\n";
218     close($sock);
219     wait_then_retry();
220     next;
221     }
222     ($filter_addr, $udp_port, $tcp_port) = ($1, $2, $3);
223     unless (defined($filter_addr) && defined($udp_port) && defined($tcp_port)) {
224     print "Failed: Filter address response from server did not make sense: $response\n";
225     close($sock);
226     wait_then_retry();
227     next;
228     }
229    
230     print "Got filter data ($filter_addr, $udp_port, $tcp_port)\n";
231    
232     print $sock "END\n";
233     $response = <$sock>;
234 pjm2 1.32 if ($response && ($response eq "OK\n")) {
235 pjm2 1.28 print "Host successfully configured via TCP.\n"
236     }
237     else {
238 pjm2 1.32 print "The server failed the host configuration on the END command.\n";
239 pjm2 1.28 close($sock);
240     wait_then_retry();
241     next;
242     }
243 pjm2 1.1
244 pjm2 1.28 close($sock);
245 pjm2 1.1
246 pjm2 1.28 print "Configuration finished sucessfully!\n";
247     last;
248 pjm2 1.1 }
249 pjm2 1.12 return;
250 pjm2 1.1 }
251    
252 pjm2 1.12
253    
254    
255     #-----------------------------------------------------------------------
256     # send_udp_packet
257     # Sends a UDP packet to an i-scream filter.
258     # The packet contains XML markup describing some of the machine's state.
259     # Receipt of UDP packets is not guaranteed.
260     #-----------------------------------------------------------------------
261 pjm2 1.1 sub send_udp_packet() {
262    
263     my(@statgrab) = `./statgrab.pl`;
264     my(%packet);
265 pjm2 1.13 for (my($i) = 0; $i <= $#statgrab; $i++) {
266 pjm2 1.1 $statgrab[$i] =~ /^([^\s]*) (.*)$/;
267     $packet{$1} = $2;
268     }
269    
270     my($date) = time;
271    
272     my($disk_info) = "<disk>";
273     my($i) = 0;
274     while (defined $packet{"packet.disk.p$i.attributes.mount"}) {
275 pjm2 1.15 $disk_info .= "<p$i";
276     $disk_info .= " name=\"" . $packet{"packet.disk.p$i.attributes.name"} . "\"";
277     $disk_info .= " kbytes=\"" . $packet{"packet.disk.p$i.attributes.kbytes"} . "\"";
278     $disk_info .= " used=\"" . $packet{"packet.disk.p$i.attributes.used"} . "\"";
279     $disk_info .= " avail=\"" . $packet{"packet.disk.p$i.attributes.avail"} . "\"";
280     $disk_info .= " mount=\"" . $packet{"packet.disk.p$i.attributes.mount"} . "\"";
281     $disk_info .= "></p$i>";
282 pjm2 1.1 ++$i;
283     }
284     $disk_info .= "</disk>";
285 pjm2 1.13
286 pjm2 1.33 my($ip);
287     $ip = inet_ntoa(scalar(gethostbyname(hostname())) || 'localhost') or $ip = 'localhost';
288 pjm2 1.1
289     # Build the XML packet this way, as we can clearly
290     # see the structure and contents... I like this ;-)
291 pjm2 1.12 # [Note that the server rejects UDP packets that are
292     # larger than 8196 bytes]
293 pjm2 1.1 my($xml) = <<EOF;
294    
295 tdb 1.19 <packet seq_no="$seq_no" machine_name="$fqdn" date="$date" type="data" ip="$ip">
296 pjm2 1.1 <load>
297     <load1>$packet{"packet.load.load1"}</load1>
298     <load5>$packet{"packet.load.load5"}</load5>
299     <load15>$packet{"packet.load.load15"}</load15>
300     </load>
301     <os>
302     <name>$packet{"packet.os.name"}</name>
303     <release>$packet{"packet.os.release"}</release>
304     <platform>$packet{"packet.os.platform"}</platform>
305     <sysname>$packet{"packet.os.sysname"}</sysname>
306     <version>$packet{"packet.os.version"}</version>
307 tdb 1.10 <uptime>$packet{"packet.os.uptime"}</uptime>
308 pjm2 1.1 </os>
309     <users>
310     <count>$packet{"packet.users.count"}</count>
311     <list>$packet{"packet.users.list"}</list>
312     </users>
313     <processes>
314     <total>$packet{"packet.processes.total"}</total>
315     <sleeping>$packet{"packet.processes.sleeping"}</sleeping>
316     <zombie>$packet{"packet.processes.zombie"}</zombie>
317     <stopped>$packet{"packet.processes.stopped"}</stopped>
318     <cpu>$packet{"packet.processes.cpu"}</cpu>
319     </processes>
320     <cpu>
321     <idle>$packet{"packet.cpu.idle"}</idle>
322     <user>$packet{"packet.cpu.user"}</user>
323     <kernel>$packet{"packet.cpu.kernel"}</kernel>
324     <iowait>$packet{"packet.cpu.iowait"}</iowait>
325     <swap>$packet{"packet.cpu.swap"}</swap>
326     </cpu>
327     <memory>
328 pjm2 1.13 <total>$packet{"packet.memory.total"}</total>
329 pjm2 1.1 <free>$packet{"packet.memory.free"}</free>
330     </memory>
331     <swap>
332 pjm2 1.13 <total>$packet{"packet.swap.total"}</total>
333     <free>$packet{"packet.swap.free"}</free>
334 pjm2 1.1 </swap>
335     $disk_info
336     </packet>
337    
338     EOF
339    
340 pjm2 1.12 # Make the packet smaller by stripping out newlines and leading spaces.
341 pjm2 1.1 $xml =~ s/\n\s*//g;
342 tdb 1.19
343 pjm2 1.1 my($sock) = new IO::Socket::INET (
344     PeerPort => $udp_port,
345     PeerAddr => $filter_addr,
346     Proto => 'udp'
347 pjm2 1.21 ) or die "Could not send UDP: $!\n";
348 pjm2 1.1
349     print $sock $xml or die "Could not send UDP packet: $!\n";
350     close($sock);
351     $seq_no++;
352     print "-";
353 pjm2 1.12
354     return;
355 pjm2 1.1 }
356    
357 pjm2 1.12
358    
359    
360     #-----------------------------------------------------------------------
361     # send_tcp_heartbeat
362     # Establishes a TCP connection to an i-scream filter.
363     # The heartbeat is used as a guaranteed "I'm alive" delivery mechanism.
364 pjm2 1.34 # If we need to reconfigure, then we complete the heartbeat before
365     # doing so.
366 pjm2 1.12 #-----------------------------------------------------------------------
367 pjm2 1.1 sub send_tcp_heartbeat() {
368    
369 pjm2 1.34 my ($doReconfigure) = 0;
370    
371 pjm2 1.1 my($sock) = new IO::Socket::INET(
372     PeerAddr => $filter_addr,
373     PeerPort => $tcp_port,
374     Proto => 'tcp'
375 pjm2 1.21 ) or return;
376 pjm2 1.11 if (!defined $sock) {
377     print "IHOST WARNING: Failed to deliver a heartbeat to the i-scream filter.\n";
378 pjm2 1.27 &tcp_configure();
379 pjm2 1.11 return;
380     }
381 pjm2 1.1
382     # Now run through the configuration process.
383     my($response);
384    
385     print $sock "HEARTBEAT\n";
386     $response = <$sock>;
387 pjm2 1.32 if (!$response eq "OK\n") {
388 pjm2 1.1 close($sock);
389     print "Server gave wrong response to HEARTBEAT: $response\n";
390 pjm2 1.27 &tcp_configure();
391 pjm2 1.1 return;
392     }
393    
394     print $sock "CONFIG\n";
395     $response = <$sock>;
396 pjm2 1.32 if (!$response eq "OK\n") {
397 pjm2 1.1 close($sock);
398     print "Server gave wrong response to CONFIG: $response\n";
399 pjm2 1.27 &tcp_configure();
400 pjm2 1.1 return;
401     }
402    
403     print $sock "$file_list\n";
404     $response = <$sock>;
405 pjm2 1.32 if (!$response eq "OK\n") {
406 pjm2 1.1 close($sock);
407     print "Server gave wrong response to file list: $response\n";
408 pjm2 1.27 &tcp_configure();
409 pjm2 1.1 return;
410     }
411    
412     print $sock "$last_modified\n";
413     $response = <$sock>;
414 pjm2 1.32 if ($response eq "ERROR\n") {
415 pjm2 1.1 close($sock);
416 pjm2 1.27 print "Server configuration changed. Reconfiguring with filter manager.\n";
417 pjm2 1.34 $doReconfigure = 1;
418 pjm2 1.1 }
419 pjm2 1.32 if (!$response eq "OK\n") {
420 pjm2 1.1 close($sock);
421     print "Server gave wrong response to HEARTBEAT: $response\n";
422 pjm2 1.27 &tcp_configure();
423 pjm2 1.1 return;
424     }
425    
426     print $sock "ENDHEARTBEAT\n";
427 pjm2 1.23 $response = <$sock>;
428 pjm2 1.32 if (!$response eq "OK\n") {
429 pjm2 1.1 close($sock);
430     print "Server gave wrong response to ENDHEARTBEAT: $response\n";
431 pjm2 1.27 &tcp_configure();
432 pjm2 1.1 return;
433     }
434    
435     close($sock);
436     print "^";
437 pjm2 1.34
438     &tcp_configure() if $doReconfigure;
439 tdb 1.25
440     return;
441     }
442    
443     #-----------------------------------------------------------------------
444     # write_pid
445     # Writes the PID (process ID) of this instance to $pidfile.
446     # This is then used by a seperate script to check (and restart) ihost.
447     #-----------------------------------------------------------------------
448     sub write_pid() {
449     open PID, ">$pidfile";
450     print PID $$;
451     close PID;
452 pjm2 1.12
453     return;
454 pjm2 1.1 }