ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.13
Committed: Thu Nov 23 16:37:44 2006 UTC (17 years, 5 months ago) by tdb
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 1.12: +3 -3 lines
Log Message:
Make graph sizes configurable. Needed for rrdtool 1.2.x when text is
anti-aliased and no longer fits.

File Contents

# User Rev Content
1 tdb 1.1 #!/usr/bin/perl -w
2    
3 tdb 1.2 #
4     # i-scream central monitoring system
5 tdb 1.8 # http://www.i-scream.org
6 tdb 1.2 # Copyright (C) 2000-2002 i-scream
7     #
8     # This program is free software; you can redistribute it and/or
9     # modify it under the terms of the GNU General Public License
10     # as published by the Free Software Foundation; either version 2
11     # of the License, or (at your option) any later version.
12     #
13     # This program is distributed in the hope that it will be useful,
14     # but WITHOUT ANY WARRANTY; without even the implied warranty of
15     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     # GNU General Public License for more details.
17     #
18     # You should have received a copy of the GNU General Public License
19     # along with this program; if not, write to the Free Software
20     # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21     #
22    
23 tdb 1.1 # -----------------------------------------------------------
24     # i-scream graph generation scripts
25 tdb 1.8 # http://www.i-scream.org
26 tdb 1.1 #
27     # Generates rrd databases for i-scream data by connecting to
28     # the i-scream server and collecting data.
29     #
30     # $Author: tdb $
31 tdb 1.13 # $Id: watch.pl,v 1.12 2005/06/29 10:26:40 tdb Exp $
32 tdb 1.1 #------------------------------------------------------------
33    
34 tdb 1.13 my($version) = '$Id: watch.pl,v 1.12 2005/06/29 10:26:40 tdb Exp $';
35 tdb 1.1
36     $| = 1;
37    
38     use strict;
39 tdb 1.3 use Getopt::Std;
40     use IO::Socket;
41 tdb 1.1 use iscream::XMLParser;
42     use RRDs;
43    
44 tdb 1.3 # define variables that will be read from the config
45     # nb. keep this insync with the config file!
46 tdb 1.9 use vars qw{
47 tdb 1.13 $imgdir $imgwidth $imgheight $rrddir
48 tdb 1.3 $maxrrdage $maximgage $deleterrds $deleteimgs
49 tdb 1.10 $hex_slash $hex_underscore $hex_space $hex_colon $hex_bslash $hex_rbracket
50 tdb 1.12 $hex_lbracket $hex_plus $hex_hash
51 tdb 1.3 $rrdstep $retry_wait
52     $verbose $quiet
53     };
54    
55     # default locate of the config file
56     my($configfile) = "rrdgraphing.conf";
57    
58     # check for command line arguments
59     my(%opts);
60     my($ret) = getopts('hvqVc:', \%opts);
61    
62     # if invalid argument given, $ret will not be 1
63     &usage() if $ret != 1;
64    
65     # first process the arguments which might mean we exit now
66    
67     # -h is usage
68     if($opts{h}) {
69     &usage();
70     }
71     # -V is version
72     if($opts{V}) {
73     print "watch.pl version: $version\n";
74     exit(1);
75     }
76 tdb 1.1
77 tdb 1.3 # Then try getting the config
78 tdb 1.1
79 tdb 1.3 # -c specifies the config file location
80     if($opts{c}) {
81     $configfile = $opts{c};
82     }
83     # suck in the config
84     &log("reading config from $configfile\n");
85     do $configfile;
86    
87     # Then any options we might want to override the config with
88    
89     # -v is verbose
90     if($opts{v}) {
91     $verbose = $opts{v};
92     }
93     # -q is verbose
94     if($opts{q}) {
95     $quiet = $opts{q};
96     # if we're meant to be quiet, we can hardly be verbose!
97     # $verbose = 0;
98     }
99 tdb 1.1
100 tdb 1.3 # Finally check for required arguments
101    
102     # check we still have two arguments left
103 tdb 1.1 if (@ARGV != 2) {
104 tdb 1.3 &usage();
105 tdb 1.1 }
106    
107     # user supplied client interface server and port
108     my($addr) = $ARGV[0];
109     my($cport) = $ARGV[1];
110    
111 tdb 1.3
112     # Main program loop
113 tdb 1.1 while(1) {
114    
115 tdb 1.3 &log("Connecting control channel to port $cport on $addr...\n");
116 tdb 1.1
117     # attempt to connect the control channel
118     my($csock) = new IO::Socket::INET(
119     PeerAddr => $addr,
120     PeerPort => $cport,
121     Proto => 'tcp'
122     );
123    
124     # if socket isn't defined connection failed
125     if (!defined $csock) {
126 tdb 1.3 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
127     &error("Please check that there is an i-scream server at this address.\n");
128 tdb 1.1 &wait_then_retry();
129     next;
130     }
131    
132     my($response);
133    
134     # client interface should send it's protocol ID
135     # we know about "PROTOCOL 1.1", and will only accept the same
136     $response = <$csock>;
137 tdb 1.3 &log("CLI sent: $response");
138 tdb 1.1 if ($response && $response ne "PROTOCOL 1.1\n") {
139 tdb 1.3 &error("The i-scream server sent an unexpected protocol ID: $response\n");
140 tdb 1.1 close($csock);
141     &wait_then_retry();
142     next;
143     }
144    
145     # send our identifier to the client interface
146     print $csock "rrdgraphing\n";
147 tdb 1.3 &log("we sent: rrdgraphing\n");
148 tdb 1.1 $response = <$csock>;
149 tdb 1.3 &log("CLI sent: $response");
150 tdb 1.1 if ($response && $response ne "OK\n") {
151 tdb 1.3 &error("Received unexpected response: $response\n");
152 tdb 1.1 close($csock);
153     &wait_then_retry();
154     next;
155     }
156    
157     # tell the client interface we'd like to start the data channel
158     print $csock "STARTDATA\n";
159 tdb 1.3 &log("we sent: STARTDATA\n");
160    
161 tdb 1.1 # the response should be the socket to connect the data channel to
162     $response = <$csock>;
163 tdb 1.3 &log("CLI sent: $response");
164 tdb 1.1 chomp $response;
165    
166     my($dport) = $response;
167 tdb 1.3 &log("Connecting data channel to port $dport on $addr...\n");
168 tdb 1.1
169     # attempt to connect the data channel
170     my($dsock) = new IO::Socket::INET(
171     PeerAddr => $addr,
172     PeerPort => $dport,
173     Proto => 'tcp'
174     ) or die "arse?";
175    
176     # if socket isn't defined connection failed
177     if (!defined $dsock) {
178 tdb 1.3 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
179     &error("Failure in communications.\n");
180 tdb 1.1 close($csock);
181     &wait_then_retry();
182     next;
183     }
184    
185     # the data channel should now be sending us data!
186    
187     # call sub to process data being received over the data channel
188     &processdata($dsock);
189    
190     # data processing has stopped, close sockets
191     close($csock);
192     close($dsock);
193    
194     # wait before retrying
195     &wait_then_retry();
196     }
197    
198     # we'll never reach here... unless 1 becomes false for some reason ;)
199     exit 0;
200    
201    
202     #
203     # wait for a while before retrying
204     #
205     sub wait_then_retry() {
206 tdb 1.3 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
207 tdb 1.1 sleep $retry_wait;
208     }
209    
210     #
211     # Given the socket of the data channel will process all
212     # the incoming XML data, creating and updating the appropriate
213     # database files.
214     #
215     # $dsock = socket connected to the data channel
216     #
217     sub processdata() {
218     # the socket connected to the data channel
219     my($dsock) = @_;
220     # save us recreating this variable each time we loop
221     my($xml);
222    
223     while(1) {
224     # read data
225     $xml = <$dsock>;
226    
227     # something odd has happened
228     last if not defined $xml;
229    
230     # attempt to parse the data
231     my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
232     if($err) {
233 tdb 1.3 &error("Skipped, XML did not parse: $xml");
234 tdb 1.1 next;
235     }
236    
237     # standard data packet
238     if($xmlhash{"packet.attributes.type"} eq "data") {
239     my($machine) = $xmlhash{"packet.attributes.machine_name"};
240     my($date) = $xmlhash{"packet.attributes.date"};
241    
242     # make directory for machine
243     if(! -d "$rrddir/$machine") {
244     # not sure on this umask, but it seems to work?
245     mkdir "$rrddir/$machine", 0777;
246 tdb 1.3 &log("created directory $rrddir/$machine\n");
247 tdb 1.1 }
248    
249     my(@data);
250    
251     # cpu
252     @data = ( "packet.cpu.idle:idle:GAUGE",
253     "packet.cpu.user:user:GAUGE",
254     "packet.cpu.kernel:kernel:GAUGE",
255     "packet.cpu.swap:swap:GAUGE",
256     "packet.cpu.iowait:iowait:GAUGE",
257     );
258     &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
259    
260 tdb 1.9 # uptime
261     @data = ( "packet.os.uptime:uptime:GAUGE" );
262     &updaterrd($machine, "uptime", $date, $rrdstep, \%xmlhash, @data);
263    
264 tdb 1.1 # mem
265     @data = ( "packet.memory.free:free:GAUGE",
266     "packet.memory.total:total:GAUGE",
267 tdb 1.7 "packet.memory.cache:cache:GAUGE",
268 tdb 1.1 );
269     &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
270    
271     # load
272     @data = ( "packet.load.load1:load1:GAUGE",
273     "packet.load.load5:load5:GAUGE",
274     "packet.load.load15:load15:GAUGE",
275     );
276     &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
277    
278     # processes
279     @data = ( "packet.processes.cpu:cpu:GAUGE",
280     "packet.processes.sleeping:sleeping:GAUGE",
281     "packet.processes.stopped:stopped:GAUGE",
282     "packet.processes.total:total:GAUGE",
283     "packet.processes.zombie:zombie:GAUGE",
284     );
285     &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
286    
287     # swap
288     @data = ( "packet.swap.free:free:GAUGE",
289     "packet.swap.total:total:GAUGE",
290     );
291     &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
292    
293     # users
294     @data = ( "packet.users.count:count:GAUGE",
295     );
296     &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
297 tdb 1.5
298     # paging
299 tdb 1.6 @data = ( "packet.pages.pageins:pageins:GAUGE",
300     "packet.pages.pageouts:pageouts:GAUGE",
301 tdb 1.5 );
302     &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data);
303    
304 tdb 1.1 # disk
305     my($i) = 0;
306     while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
307     my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
308     $mount =~ s/_/$hex_underscore/g;
309     $mount =~ s/\//$hex_slash/g;
310 tdb 1.9 $mount =~ s/\\/$hex_bslash/g;
311     $mount =~ s/ /$hex_space/g;
312     $mount =~ s/:/$hex_colon/g;
313 tdb 1.6 @data = ( "packet.disk.p$i.attributes.total:total:GAUGE",
314 tdb 1.1 "packet.disk.p$i.attributes.used:used:GAUGE",
315 tdb 1.5 "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE",
316     "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE",
317 tdb 1.1 );
318     &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
319     ++$i;
320 tdb 1.6 }
321    
322     # diskio
323     $i = 0;
324     while(defined $xmlhash{"packet.diskio.p$i.attributes.name"}) {
325     my($name) = $xmlhash{"packet.diskio.p$i.attributes.name"};
326 tdb 1.9 $name =~ s/_/$hex_underscore/g;
327     $name =~ s/\//$hex_slash/g;
328     $name =~ s/\\/$hex_bslash/g;
329     $name =~ s/ /$hex_space/g;
330     $name =~ s/:/$hex_colon/g;
331 tdb 1.6 @data = ( "packet.diskio.p$i.attributes.rbytes:rbytes:GAUGE",
332     "packet.diskio.p$i.attributes.wbytes:wbytes:GAUGE",
333     );
334     &updaterrd($machine, "diskio-$name", $date, $rrdstep, \%xmlhash, @data);
335     ++$i
336     }
337    
338     # net
339     $i = 0;
340     while(defined $xmlhash{"packet.net.p$i.attributes.name"}) {
341     my($name) = $xmlhash{"packet.net.p$i.attributes.name"};
342 tdb 1.9 $name =~ s/_/$hex_underscore/g;
343     $name =~ s/\//$hex_slash/g;
344     $name =~ s/\\/$hex_bslash/g;
345     $name =~ s/ /$hex_space/g;
346     $name =~ s/:/$hex_colon/g;
347 tdb 1.11 $name =~ s/\(/$hex_lbracket/g;
348     $name =~ s/\)/$hex_rbracket/g;
349 tdb 1.10 $name =~ s/\+/$hex_plus/g;
350 tdb 1.12 $name =~ s/#/$hex_hash/g;
351 tdb 1.6 @data = ( "packet.net.p$i.attributes.rx:rx:GAUGE",
352     "packet.net.p$i.attributes.tx:tx:GAUGE",
353     );
354 tdb 1.7 &updaterrd($machine, "net-$name", $date, $rrdstep, \%xmlhash, @data);
355     ++$i
356     }
357    
358     # mailq
359     $i = 0;
360     while(defined $xmlhash{"packet.mailq.p$i.attributes.name"}) {
361     my($name) = $xmlhash{"packet.mailq.p$i.attributes.name"};
362 tdb 1.9 $name =~ s/\s+//g;
363 tdb 1.7 @data = ( "packet.mailq.p$i.attributes.size:size:GAUGE",
364     );
365     &updaterrd($machine, "mailq-$name", $date, $rrdstep, \%xmlhash, @data);
366 tdb 1.6 ++$i
367 tdb 1.1 }
368     }
369    
370     # queue statistics packet
371     elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
372     # psuedo machine for internal server stuff
373     my($machine) = "i-scream-server";
374     # make directory
375     if(! -d "$rrddir/$machine") {
376     # not sure on this umask, but it seems to work?
377     mkdir "$rrddir/$machine", 0777;
378 tdb 1.3 &log("created directory $rrddir/$machine\n");
379 tdb 1.1 }
380     my($hash) = $xmlhash{"packet.attributes.hashCode"};
381     my($date) = $xmlhash{"packet.attributes.date"};
382     my($name) = $xmlhash{"packet.attributes.name"};
383     # take a look to see if we have a shutdown packet...
384     if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
385     unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
386 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
387 tdb 1.1 next;
388     }
389     # look through to see how many internal queues we have
390     my($i) = 0;
391     while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
392     # see if the queue has been removed
393     if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
394     # delete the queues rrd
395     unlink "$rrddir/$machine/$hash\_$i.rrd";
396 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
397 tdb 1.1 # are there any other rrd's left on this queue? if not, cleanup.
398     # get a list of any that may be still there..
399     opendir(DIR, "$rrddir/$machine");
400     my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
401     closedir DIR;
402     # count them (+1 because an empty array is size -1)
403     my($rrdcount) = $#rrdcountfiles + 1;
404     if($rrdcount == 0) {
405     # clean up the def file and any images
406     unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
407 tdb 1.3 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
408 tdb 1.1 }
409     ++$i;
410     next;
411     }
412     # the &updaterrd will also do this check, but we want
413     # to write our def file out first
414     if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
415     open(DEF, ">$rrddir/$machine/$hash.def");
416     print DEF $name;
417     close DEF;
418 tdb 1.3 &log("created $rrddir/$machine/$hash.def\n");
419 tdb 1.1 }
420     my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
421     "packet.queue.attributes.total:total:COUNTER",
422     );
423     &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
424     ++$i;
425     }
426     }
427     else {
428 tdb 1.3 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
429 tdb 1.1 }
430     }
431    
432     # we'll now return from this sub and reconnect
433 tdb 1.3 &error("Data channel socket gave no data, bailing out...\n");
434 tdb 1.1 }
435    
436     #
437     # sub to update an rrd file
438     #
439     # $machine = name of the machine
440     # (eg. kernow.ukc.ac.uk)
441     # $type = the type of graph for the machine
442     # (eg. cpu)
443     # $date = the date of the item we want to add
444     # (in seconds since the epoch)
445     # $step = the interval at which the database steps
446     # used when we create a new rrd
447     # $xmlref = reference to the xml data packet
448     # @data = array containing data items to add
449     # (eg. "packet.cpu.user:user:GAUGE")
450     #
451     sub updaterrd() {
452     my($machine, $type, $date, $step, $xmlref, @data) = @_;
453     # get hold of the xmlhash we have a reference to
454     my(%xmlhash) = %$xmlref;
455     # check if we need to create a new rrd
456     if( ! -f "$rrddir/$machine/$type.rrd") {
457     my(@createdata);
458     # pull the details out of the data we've been given
459     foreach my $dataitem (@data) {
460     if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
461     push @createdata, "$1:$2";
462     }
463     }
464     # call the &makerrd to actually create the rrd
465 tdb 1.3 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
466 tdb 1.1 &makerrd($machine, $type, $date, $step, @createdata);
467     }
468     # get the details out of the data we've been given
469     my($updateparams) = "$date";
470     foreach my $dataitem (@data) {
471     if($dataitem =~ /^(\S+):\S+:\S+$/) {
472     # pull the values straight out of the xmlhash
473     my($value) = $xmlhash{$1};
474     # if it's undefined we'll set it to 0
475     # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
476     $value = "0" if not defined $value;
477     $updateparams .= ":$value";
478     }
479     }
480     # perform the update
481     RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
482 tdb 1.3 &log("updating $rrddir/$machine/$type.rrd\n");
483 tdb 1.1 my($err) = RRDs::error;
484 tdb 1.3 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
485 tdb 1.1 }
486    
487     #
488     # sub to create a new rrd file
489     #
490     # $machine = name of the machine
491     # (eg. kernow.ukc.ac.uk)
492     # $type = the type of graph for the machine
493     # (eg. cpu)
494     # $start = the date of the first item we want to add
495     # (in seconds since the epoch)
496     # $step = the interval at which the database steps
497     # @data = the data items we want to put in the rrd
498     # in the form: $dsname:dstype
499     # (eg. "size:GAUGE")
500     #
501     sub makerrd() {
502     my($machine, $type, $start, $step, @data) = @_;
503     # check if directory exists for rrd
504     if(! -d "$rrddir/$machine") {
505     # not sure on this umask, but it seems to work?
506     mkdir "$rrddir/$machine", 0777;
507 tdb 1.3 &log("created directory $rrddir/$machine\n");
508 tdb 1.1 }
509     my(@rrdcmd);
510     # we'll want to add our first data item at $start,
511     # so we start our rrd $step before that.
512     $start -= $step;
513     push @rrdcmd, "$rrddir/$machine/$type.rrd";
514     push @rrdcmd, "--start=$start";
515     push @rrdcmd, "--step=$step";
516     foreach my $dataitem (@data) {
517     # dataitem should be: "dsname:dstype"
518     if($dataitem =~ /^(\S+):(\S+)$/) {
519     push @rrdcmd, "DS:$1:$2:600:U:U";
520     }
521     }
522     push @rrdcmd, (
523     # 3h in 15s samples
524     "RRA:AVERAGE:0.5:1:720",
525     "RRA:MAX:0.5:1:720",
526     # 1d in 2m samples
527     "RRA:AVERAGE:0.5:8:720",
528     "RRA:MAX:0.5:8:720",
529     # 1w in 15m samples
530     "RRA:AVERAGE:0.5:60:672",
531     "RRA:MAX:0.5:60:672",
532     # 1m in 1hr samples
533     "RRA:AVERAGE:0.5:240:744",
534     "RRA:MAX:0.5:240:744",
535     # 1y in 12hr samples
536     "RRA:AVERAGE:0.5:2880:730",
537     "RRA:MAX:0.5:2880:730",
538     );
539     RRDs::create (@rrdcmd);
540     my($err) = RRDs::error;
541 tdb 1.3 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
542     }
543    
544     # prints out usage information then exits
545     sub usage() {
546     print "Usage: watch.pl [options] i-scream_client_interface port\n";
547     print "Options\n";
548     print " -c config Specifies the configuration file\n";
549     print " default: rrdgraphing.conf\n";
550     print " -v Be verbose about what's happening\n";
551     print " -q Be quiet, even supress errors\n";
552     print " -V Print version number\n";
553     print " -h Prints this help page\n";
554     exit(1);
555     }
556    
557     # prints a log message if verbose is turned on
558     sub log() {
559     my($msg) = @_;
560     print $msg if $verbose;
561     }
562    
563     # prints an error message unless quiet is turned on
564     sub error() {
565     my($msg) = @_;
566     print STDERR $msg unless $quiet;
567 tdb 1.1 }