ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.11
Committed: Wed Jun 15 15:39:25 2005 UTC (19 years, 6 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.10: +4 -4 lines
Log Message:
Get the brackets the right way round. Skel spotted his own problem :)
I should probably have checked a bit more closely ;)

File Contents

# User Rev Content
1 tdb 1.1 #!/usr/bin/perl -w
2    
3 tdb 1.2 #
4     # i-scream central monitoring system
5 tdb 1.8 # http://www.i-scream.org
6 tdb 1.2 # Copyright (C) 2000-2002 i-scream
7     #
8     # This program is free software; you can redistribute it and/or
9     # modify it under the terms of the GNU General Public License
10     # as published by the Free Software Foundation; either version 2
11     # of the License, or (at your option) any later version.
12     #
13     # This program is distributed in the hope that it will be useful,
14     # but WITHOUT ANY WARRANTY; without even the implied warranty of
15     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     # GNU General Public License for more details.
17     #
18     # You should have received a copy of the GNU General Public License
19     # along with this program; if not, write to the Free Software
20     # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21     #
22    
23 tdb 1.1 # -----------------------------------------------------------
24     # i-scream graph generation scripts
25 tdb 1.8 # http://www.i-scream.org
26 tdb 1.1 #
27     # Generates rrd databases for i-scream data by connecting to
28     # the i-scream server and collecting data.
29     #
30     # $Author: tdb $
31 tdb 1.11 # $Id: watch.pl,v 1.10 2005/06/15 10:41:25 tdb Exp $
32 tdb 1.1 #------------------------------------------------------------
33    
34 tdb 1.11 my($version) = '$Id: watch.pl,v 1.10 2005/06/15 10:41:25 tdb Exp $';
35 tdb 1.1
36     $| = 1;
37    
38     use strict;
39 tdb 1.3 use Getopt::Std;
40     use IO::Socket;
41 tdb 1.1 use iscream::XMLParser;
42     use RRDs;
43    
44 tdb 1.3 # define variables that will be read from the config
45     # nb. keep this insync with the config file!
46 tdb 1.9 use vars qw{
47     $imgdir $rrddir
48 tdb 1.3 $maxrrdage $maximgage $deleterrds $deleteimgs
49 tdb 1.10 $hex_slash $hex_underscore $hex_space $hex_colon $hex_bslash $hex_rbracket
50     $hex_lbracket $hex_plus
51 tdb 1.3 $rrdstep $retry_wait
52     $verbose $quiet
53     };
54    
55     # default locate of the config file
56     my($configfile) = "rrdgraphing.conf";
57    
58     # check for command line arguments
59     my(%opts);
60     my($ret) = getopts('hvqVc:', \%opts);
61    
62     # if invalid argument given, $ret will not be 1
63     &usage() if $ret != 1;
64    
65     # first process the arguments which might mean we exit now
66    
67     # -h is usage
68     if($opts{h}) {
69     &usage();
70     }
71     # -V is version
72     if($opts{V}) {
73     print "watch.pl version: $version\n";
74     exit(1);
75     }
76 tdb 1.1
77 tdb 1.3 # Then try getting the config
78 tdb 1.1
79 tdb 1.3 # -c specifies the config file location
80     if($opts{c}) {
81     $configfile = $opts{c};
82     }
83     # suck in the config
84     &log("reading config from $configfile\n");
85     do $configfile;
86    
87     # Then any options we might want to override the config with
88    
89     # -v is verbose
90     if($opts{v}) {
91     $verbose = $opts{v};
92     }
93     # -q is verbose
94     if($opts{q}) {
95     $quiet = $opts{q};
96     # if we're meant to be quiet, we can hardly be verbose!
97     # $verbose = 0;
98     }
99 tdb 1.1
100 tdb 1.3 # Finally check for required arguments
101    
102     # check we still have two arguments left
103 tdb 1.1 if (@ARGV != 2) {
104 tdb 1.3 &usage();
105 tdb 1.1 }
106    
107     # user supplied client interface server and port
108     my($addr) = $ARGV[0];
109     my($cport) = $ARGV[1];
110    
111 tdb 1.3
112     # Main program loop
113 tdb 1.1 while(1) {
114    
115 tdb 1.3 &log("Connecting control channel to port $cport on $addr...\n");
116 tdb 1.1
117     # attempt to connect the control channel
118     my($csock) = new IO::Socket::INET(
119     PeerAddr => $addr,
120     PeerPort => $cport,
121     Proto => 'tcp'
122     );
123    
124     # if socket isn't defined connection failed
125     if (!defined $csock) {
126 tdb 1.3 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
127     &error("Please check that there is an i-scream server at this address.\n");
128 tdb 1.1 &wait_then_retry();
129     next;
130     }
131    
132     my($response);
133    
134     # client interface should send it's protocol ID
135     # we know about "PROTOCOL 1.1", and will only accept the same
136     $response = <$csock>;
137 tdb 1.3 &log("CLI sent: $response");
138 tdb 1.1 if ($response && $response ne "PROTOCOL 1.1\n") {
139 tdb 1.3 &error("The i-scream server sent an unexpected protocol ID: $response\n");
140 tdb 1.1 close($csock);
141     &wait_then_retry();
142     next;
143     }
144    
145     # send our identifier to the client interface
146     print $csock "rrdgraphing\n";
147 tdb 1.3 &log("we sent: rrdgraphing\n");
148 tdb 1.1 $response = <$csock>;
149 tdb 1.3 &log("CLI sent: $response");
150 tdb 1.1 if ($response && $response ne "OK\n") {
151 tdb 1.3 &error("Received unexpected response: $response\n");
152 tdb 1.1 close($csock);
153     &wait_then_retry();
154     next;
155     }
156    
157     # tell the client interface we'd like to start the data channel
158     print $csock "STARTDATA\n";
159 tdb 1.3 &log("we sent: STARTDATA\n");
160    
161 tdb 1.1 # the response should be the socket to connect the data channel to
162     $response = <$csock>;
163 tdb 1.3 &log("CLI sent: $response");
164 tdb 1.1 chomp $response;
165    
166     my($dport) = $response;
167 tdb 1.3 &log("Connecting data channel to port $dport on $addr...\n");
168 tdb 1.1
169     # attempt to connect the data channel
170     my($dsock) = new IO::Socket::INET(
171     PeerAddr => $addr,
172     PeerPort => $dport,
173     Proto => 'tcp'
174     ) or die "arse?";
175    
176     # if socket isn't defined connection failed
177     if (!defined $dsock) {
178 tdb 1.3 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
179     &error("Failure in communications.\n");
180 tdb 1.1 close($csock);
181     &wait_then_retry();
182     next;
183     }
184    
185     # the data channel should now be sending us data!
186    
187     # call sub to process data being received over the data channel
188     &processdata($dsock);
189    
190     # data processing has stopped, close sockets
191     close($csock);
192     close($dsock);
193    
194     # wait before retrying
195     &wait_then_retry();
196     }
197    
198     # we'll never reach here... unless 1 becomes false for some reason ;)
199     exit 0;
200    
201    
202     #
203     # wait for a while before retrying
204     #
205     sub wait_then_retry() {
206 tdb 1.3 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
207 tdb 1.1 sleep $retry_wait;
208     }
209    
210     #
211     # Given the socket of the data channel will process all
212     # the incoming XML data, creating and updating the appropriate
213     # database files.
214     #
215     # $dsock = socket connected to the data channel
216     #
217     sub processdata() {
218     # the socket connected to the data channel
219     my($dsock) = @_;
220     # save us recreating this variable each time we loop
221     my($xml);
222    
223     while(1) {
224     # read data
225     $xml = <$dsock>;
226    
227     # something odd has happened
228     last if not defined $xml;
229    
230     # attempt to parse the data
231     my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
232     if($err) {
233 tdb 1.3 &error("Skipped, XML did not parse: $xml");
234 tdb 1.1 next;
235     }
236    
237     # standard data packet
238     if($xmlhash{"packet.attributes.type"} eq "data") {
239     my($machine) = $xmlhash{"packet.attributes.machine_name"};
240     my($date) = $xmlhash{"packet.attributes.date"};
241    
242     # make directory for machine
243     if(! -d "$rrddir/$machine") {
244     # not sure on this umask, but it seems to work?
245     mkdir "$rrddir/$machine", 0777;
246 tdb 1.3 &log("created directory $rrddir/$machine\n");
247 tdb 1.1 }
248    
249     my(@data);
250    
251     # cpu
252     @data = ( "packet.cpu.idle:idle:GAUGE",
253     "packet.cpu.user:user:GAUGE",
254     "packet.cpu.kernel:kernel:GAUGE",
255     "packet.cpu.swap:swap:GAUGE",
256     "packet.cpu.iowait:iowait:GAUGE",
257     );
258     &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
259    
260 tdb 1.9 # uptime
261     @data = ( "packet.os.uptime:uptime:GAUGE" );
262     &updaterrd($machine, "uptime", $date, $rrdstep, \%xmlhash, @data);
263    
264 tdb 1.1 # mem
265     @data = ( "packet.memory.free:free:GAUGE",
266     "packet.memory.total:total:GAUGE",
267 tdb 1.7 "packet.memory.cache:cache:GAUGE",
268 tdb 1.1 );
269     &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
270    
271     # load
272     @data = ( "packet.load.load1:load1:GAUGE",
273     "packet.load.load5:load5:GAUGE",
274     "packet.load.load15:load15:GAUGE",
275     );
276     &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
277    
278     # processes
279     @data = ( "packet.processes.cpu:cpu:GAUGE",
280     "packet.processes.sleeping:sleeping:GAUGE",
281     "packet.processes.stopped:stopped:GAUGE",
282     "packet.processes.total:total:GAUGE",
283     "packet.processes.zombie:zombie:GAUGE",
284     );
285     &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
286    
287     # swap
288     @data = ( "packet.swap.free:free:GAUGE",
289     "packet.swap.total:total:GAUGE",
290     );
291     &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
292    
293     # users
294     @data = ( "packet.users.count:count:GAUGE",
295     );
296     &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
297 tdb 1.5
298     # paging
299 tdb 1.6 @data = ( "packet.pages.pageins:pageins:GAUGE",
300     "packet.pages.pageouts:pageouts:GAUGE",
301 tdb 1.5 );
302     &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data);
303    
304 tdb 1.1 # disk
305     my($i) = 0;
306     while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
307     my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
308     $mount =~ s/_/$hex_underscore/g;
309     $mount =~ s/\//$hex_slash/g;
310 tdb 1.9 $mount =~ s/\\/$hex_bslash/g;
311     $mount =~ s/ /$hex_space/g;
312     $mount =~ s/:/$hex_colon/g;
313 tdb 1.6 @data = ( "packet.disk.p$i.attributes.total:total:GAUGE",
314 tdb 1.1 "packet.disk.p$i.attributes.used:used:GAUGE",
315 tdb 1.5 "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE",
316     "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE",
317 tdb 1.1 );
318     &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
319     ++$i;
320 tdb 1.6 }
321    
322     # diskio
323     $i = 0;
324     while(defined $xmlhash{"packet.diskio.p$i.attributes.name"}) {
325     my($name) = $xmlhash{"packet.diskio.p$i.attributes.name"};
326 tdb 1.9 $name =~ s/_/$hex_underscore/g;
327     $name =~ s/\//$hex_slash/g;
328     $name =~ s/\\/$hex_bslash/g;
329     $name =~ s/ /$hex_space/g;
330     $name =~ s/:/$hex_colon/g;
331 tdb 1.6 @data = ( "packet.diskio.p$i.attributes.rbytes:rbytes:GAUGE",
332     "packet.diskio.p$i.attributes.wbytes:wbytes:GAUGE",
333     );
334     &updaterrd($machine, "diskio-$name", $date, $rrdstep, \%xmlhash, @data);
335     ++$i
336     }
337    
338     # net
339     $i = 0;
340     while(defined $xmlhash{"packet.net.p$i.attributes.name"}) {
341     my($name) = $xmlhash{"packet.net.p$i.attributes.name"};
342 tdb 1.9 $name =~ s/_/$hex_underscore/g;
343     $name =~ s/\//$hex_slash/g;
344     $name =~ s/\\/$hex_bslash/g;
345     $name =~ s/ /$hex_space/g;
346     $name =~ s/:/$hex_colon/g;
347 tdb 1.11 $name =~ s/\(/$hex_lbracket/g;
348     $name =~ s/\)/$hex_rbracket/g;
349 tdb 1.10 $name =~ s/\+/$hex_plus/g;
350 tdb 1.6 @data = ( "packet.net.p$i.attributes.rx:rx:GAUGE",
351     "packet.net.p$i.attributes.tx:tx:GAUGE",
352     );
353 tdb 1.7 &updaterrd($machine, "net-$name", $date, $rrdstep, \%xmlhash, @data);
354     ++$i
355     }
356    
357     # mailq
358     $i = 0;
359     while(defined $xmlhash{"packet.mailq.p$i.attributes.name"}) {
360     my($name) = $xmlhash{"packet.mailq.p$i.attributes.name"};
361 tdb 1.9 $name =~ s/\s+//g;
362 tdb 1.7 @data = ( "packet.mailq.p$i.attributes.size:size:GAUGE",
363     );
364     &updaterrd($machine, "mailq-$name", $date, $rrdstep, \%xmlhash, @data);
365 tdb 1.6 ++$i
366 tdb 1.1 }
367     }
368    
369     # queue statistics packet
370     elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
371     # psuedo machine for internal server stuff
372     my($machine) = "i-scream-server";
373     # make directory
374     if(! -d "$rrddir/$machine") {
375     # not sure on this umask, but it seems to work?
376     mkdir "$rrddir/$machine", 0777;
377 tdb 1.3 &log("created directory $rrddir/$machine\n");
378 tdb 1.1 }
379     my($hash) = $xmlhash{"packet.attributes.hashCode"};
380     my($date) = $xmlhash{"packet.attributes.date"};
381     my($name) = $xmlhash{"packet.attributes.name"};
382     # take a look to see if we have a shutdown packet...
383     if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
384     unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
385 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
386 tdb 1.1 next;
387     }
388     # look through to see how many internal queues we have
389     my($i) = 0;
390     while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
391     # see if the queue has been removed
392     if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
393     # delete the queues rrd
394     unlink "$rrddir/$machine/$hash\_$i.rrd";
395 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
396 tdb 1.1 # are there any other rrd's left on this queue? if not, cleanup.
397     # get a list of any that may be still there..
398     opendir(DIR, "$rrddir/$machine");
399     my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
400     closedir DIR;
401     # count them (+1 because an empty array is size -1)
402     my($rrdcount) = $#rrdcountfiles + 1;
403     if($rrdcount == 0) {
404     # clean up the def file and any images
405     unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
406 tdb 1.3 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
407 tdb 1.1 }
408     ++$i;
409     next;
410     }
411     # the &updaterrd will also do this check, but we want
412     # to write our def file out first
413     if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
414     open(DEF, ">$rrddir/$machine/$hash.def");
415     print DEF $name;
416     close DEF;
417 tdb 1.3 &log("created $rrddir/$machine/$hash.def\n");
418 tdb 1.1 }
419     my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
420     "packet.queue.attributes.total:total:COUNTER",
421     );
422     &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
423     ++$i;
424     }
425     }
426     else {
427 tdb 1.3 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
428 tdb 1.1 }
429     }
430    
431     # we'll now return from this sub and reconnect
432 tdb 1.3 &error("Data channel socket gave no data, bailing out...\n");
433 tdb 1.1 }
434    
435     #
436     # sub to update an rrd file
437     #
438     # $machine = name of the machine
439     # (eg. kernow.ukc.ac.uk)
440     # $type = the type of graph for the machine
441     # (eg. cpu)
442     # $date = the date of the item we want to add
443     # (in seconds since the epoch)
444     # $step = the interval at which the database steps
445     # used when we create a new rrd
446     # $xmlref = reference to the xml data packet
447     # @data = array containing data items to add
448     # (eg. "packet.cpu.user:user:GAUGE")
449     #
450     sub updaterrd() {
451     my($machine, $type, $date, $step, $xmlref, @data) = @_;
452     # get hold of the xmlhash we have a reference to
453     my(%xmlhash) = %$xmlref;
454     # check if we need to create a new rrd
455     if( ! -f "$rrddir/$machine/$type.rrd") {
456     my(@createdata);
457     # pull the details out of the data we've been given
458     foreach my $dataitem (@data) {
459     if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
460     push @createdata, "$1:$2";
461     }
462     }
463     # call the &makerrd to actually create the rrd
464 tdb 1.3 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
465 tdb 1.1 &makerrd($machine, $type, $date, $step, @createdata);
466     }
467     # get the details out of the data we've been given
468     my($updateparams) = "$date";
469     foreach my $dataitem (@data) {
470     if($dataitem =~ /^(\S+):\S+:\S+$/) {
471     # pull the values straight out of the xmlhash
472     my($value) = $xmlhash{$1};
473     # if it's undefined we'll set it to 0
474     # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
475     $value = "0" if not defined $value;
476     $updateparams .= ":$value";
477     }
478     }
479     # perform the update
480     RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
481 tdb 1.3 &log("updating $rrddir/$machine/$type.rrd\n");
482 tdb 1.1 my($err) = RRDs::error;
483 tdb 1.3 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
484 tdb 1.1 }
485    
486     #
487     # sub to create a new rrd file
488     #
489     # $machine = name of the machine
490     # (eg. kernow.ukc.ac.uk)
491     # $type = the type of graph for the machine
492     # (eg. cpu)
493     # $start = the date of the first item we want to add
494     # (in seconds since the epoch)
495     # $step = the interval at which the database steps
496     # @data = the data items we want to put in the rrd
497     # in the form: $dsname:dstype
498     # (eg. "size:GAUGE")
499     #
500     sub makerrd() {
501     my($machine, $type, $start, $step, @data) = @_;
502     # check if directory exists for rrd
503     if(! -d "$rrddir/$machine") {
504     # not sure on this umask, but it seems to work?
505     mkdir "$rrddir/$machine", 0777;
506 tdb 1.3 &log("created directory $rrddir/$machine\n");
507 tdb 1.1 }
508     my(@rrdcmd);
509     # we'll want to add our first data item at $start,
510     # so we start our rrd $step before that.
511     $start -= $step;
512     push @rrdcmd, "$rrddir/$machine/$type.rrd";
513     push @rrdcmd, "--start=$start";
514     push @rrdcmd, "--step=$step";
515     foreach my $dataitem (@data) {
516     # dataitem should be: "dsname:dstype"
517     if($dataitem =~ /^(\S+):(\S+)$/) {
518     push @rrdcmd, "DS:$1:$2:600:U:U";
519     }
520     }
521     push @rrdcmd, (
522     # 3h in 15s samples
523     "RRA:AVERAGE:0.5:1:720",
524     "RRA:MAX:0.5:1:720",
525     # 1d in 2m samples
526     "RRA:AVERAGE:0.5:8:720",
527     "RRA:MAX:0.5:8:720",
528     # 1w in 15m samples
529     "RRA:AVERAGE:0.5:60:672",
530     "RRA:MAX:0.5:60:672",
531     # 1m in 1hr samples
532     "RRA:AVERAGE:0.5:240:744",
533     "RRA:MAX:0.5:240:744",
534     # 1y in 12hr samples
535     "RRA:AVERAGE:0.5:2880:730",
536     "RRA:MAX:0.5:2880:730",
537     );
538     RRDs::create (@rrdcmd);
539     my($err) = RRDs::error;
540 tdb 1.3 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
541     }
542    
543     # prints out usage information then exits
544     sub usage() {
545     print "Usage: watch.pl [options] i-scream_client_interface port\n";
546     print "Options\n";
547     print " -c config Specifies the configuration file\n";
548     print " default: rrdgraphing.conf\n";
549     print " -v Be verbose about what's happening\n";
550     print " -q Be quiet, even supress errors\n";
551     print " -V Print version number\n";
552     print " -h Prints this help page\n";
553     exit(1);
554     }
555    
556     # prints a log message if verbose is turned on
557     sub log() {
558     my($msg) = @_;
559     print $msg if $verbose;
560     }
561    
562     # prints an error message unless quiet is turned on
563     sub error() {
564     my($msg) = @_;
565     print STDERR $msg unless $quiet;
566 tdb 1.1 }