ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.7
Committed: Thu Mar 4 11:27:30 2004 UTC (20 years, 9 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.6: +19 -4 lines
Log Message:
Latest versions of the rrdgraphing code. Some graphs now use areas, others
use stacks. Added cache memory, and our mail queue graphs.

File Contents

# User Rev Content
1 tdb 1.1 #!/usr/bin/perl -w
2    
3 tdb 1.2 #
4     # i-scream central monitoring system
5 tdb 1.4 # http://www.i-scream.org.uk
6 tdb 1.2 # Copyright (C) 2000-2002 i-scream
7     #
8     # This program is free software; you can redistribute it and/or
9     # modify it under the terms of the GNU General Public License
10     # as published by the Free Software Foundation; either version 2
11     # of the License, or (at your option) any later version.
12     #
13     # This program is distributed in the hope that it will be useful,
14     # but WITHOUT ANY WARRANTY; without even the implied warranty of
15     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     # GNU General Public License for more details.
17     #
18     # You should have received a copy of the GNU General Public License
19     # along with this program; if not, write to the Free Software
20     # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21     #
22    
23 tdb 1.1 # -----------------------------------------------------------
24     # i-scream graph generation scripts
25     # http://www.i-scream.org.uk
26     #
27     # Generates rrd databases for i-scream data by connecting to
28     # the i-scream server and collecting data.
29     #
30     # $Author: tdb $
31 tdb 1.7 # $Id: watch.pl,v 1.6 2003/03/10 11:34:43 tdb Exp $
32 tdb 1.1 #------------------------------------------------------------
33    
34 tdb 1.7 BEGIN {
35     push (@INC, "/usr/local/packages/rrdtool/lib/perl5/site_perl/5.8.2/sun4-solaris");
36     }
37    
38     my($version) = '$Id: watch.pl,v 1.6 2003/03/10 11:34:43 tdb Exp $';
39 tdb 1.1
40     $| = 1;
41    
42     use strict;
43 tdb 1.3 use Getopt::Std;
44     use IO::Socket;
45 tdb 1.1 use iscream::XMLParser;
46     use RRDs;
47    
48 tdb 1.3 # define variables that will be read from the config
49     # nb. keep this insync with the config file!
50     use vars qw{
51     $imgdir $rrddir
52     $maxrrdage $maximgage $deleterrds $deleteimgs
53     $hex_slash $hex_underscore
54     $rrdstep $retry_wait
55     $verbose $quiet
56     };
57    
58     # default locate of the config file
59     my($configfile) = "rrdgraphing.conf";
60    
61     # check for command line arguments
62     my(%opts);
63     my($ret) = getopts('hvqVc:', \%opts);
64    
65     # if invalid argument given, $ret will not be 1
66     &usage() if $ret != 1;
67    
68     # first process the arguments which might mean we exit now
69    
70     # -h is usage
71     if($opts{h}) {
72     &usage();
73     }
74     # -V is version
75     if($opts{V}) {
76     print "watch.pl version: $version\n";
77     exit(1);
78     }
79 tdb 1.1
80 tdb 1.3 # Then try getting the config
81 tdb 1.1
82 tdb 1.3 # -c specifies the config file location
83     if($opts{c}) {
84     $configfile = $opts{c};
85     }
86     # suck in the config
87     &log("reading config from $configfile\n");
88     do $configfile;
89    
90     # Then any options we might want to override the config with
91    
92     # -v is verbose
93     if($opts{v}) {
94     $verbose = $opts{v};
95     }
96     # -q is verbose
97     if($opts{q}) {
98     $quiet = $opts{q};
99     # if we're meant to be quiet, we can hardly be verbose!
100     # $verbose = 0;
101     }
102 tdb 1.1
103 tdb 1.3 # Finally check for required arguments
104    
105     # check we still have two arguments left
106 tdb 1.1 if (@ARGV != 2) {
107 tdb 1.3 &usage();
108 tdb 1.1 }
109    
110     # user supplied client interface server and port
111     my($addr) = $ARGV[0];
112     my($cport) = $ARGV[1];
113    
114 tdb 1.3
115     # Main program loop
116 tdb 1.1 while(1) {
117    
118 tdb 1.3 &log("Connecting control channel to port $cport on $addr...\n");
119 tdb 1.1
120     # attempt to connect the control channel
121     my($csock) = new IO::Socket::INET(
122     PeerAddr => $addr,
123     PeerPort => $cport,
124     Proto => 'tcp'
125     );
126    
127     # if socket isn't defined connection failed
128     if (!defined $csock) {
129 tdb 1.3 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
130     &error("Please check that there is an i-scream server at this address.\n");
131 tdb 1.1 &wait_then_retry();
132     next;
133     }
134    
135     my($response);
136    
137     # client interface should send it's protocol ID
138     # we know about "PROTOCOL 1.1", and will only accept the same
139     $response = <$csock>;
140 tdb 1.3 &log("CLI sent: $response");
141 tdb 1.1 if ($response && $response ne "PROTOCOL 1.1\n") {
142 tdb 1.3 &error("The i-scream server sent an unexpected protocol ID: $response\n");
143 tdb 1.1 close($csock);
144     &wait_then_retry();
145     next;
146     }
147    
148     # send our identifier to the client interface
149     print $csock "rrdgraphing\n";
150 tdb 1.3 &log("we sent: rrdgraphing\n");
151 tdb 1.1 $response = <$csock>;
152 tdb 1.3 &log("CLI sent: $response");
153 tdb 1.1 if ($response && $response ne "OK\n") {
154 tdb 1.3 &error("Received unexpected response: $response\n");
155 tdb 1.1 close($csock);
156     &wait_then_retry();
157     next;
158     }
159    
160     # tell the client interface we'd like to start the data channel
161     print $csock "STARTDATA\n";
162 tdb 1.3 &log("we sent: STARTDATA\n");
163    
164 tdb 1.1 # the response should be the socket to connect the data channel to
165     $response = <$csock>;
166 tdb 1.3 &log("CLI sent: $response");
167 tdb 1.1 chomp $response;
168    
169     my($dport) = $response;
170 tdb 1.3 &log("Connecting data channel to port $dport on $addr...\n");
171 tdb 1.1
172     # attempt to connect the data channel
173     my($dsock) = new IO::Socket::INET(
174     PeerAddr => $addr,
175     PeerPort => $dport,
176     Proto => 'tcp'
177     ) or die "arse?";
178    
179     # if socket isn't defined connection failed
180     if (!defined $dsock) {
181 tdb 1.3 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
182     &error("Failure in communications.\n");
183 tdb 1.1 close($csock);
184     &wait_then_retry();
185     next;
186     }
187    
188     # the data channel should now be sending us data!
189    
190     # call sub to process data being received over the data channel
191     &processdata($dsock);
192    
193     # data processing has stopped, close sockets
194     close($csock);
195     close($dsock);
196    
197     # wait before retrying
198     &wait_then_retry();
199     }
200    
201     # we'll never reach here... unless 1 becomes false for some reason ;)
202     exit 0;
203    
204    
205     #
206     # wait for a while before retrying
207     #
208     sub wait_then_retry() {
209 tdb 1.3 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
210 tdb 1.1 sleep $retry_wait;
211     }
212    
213     #
214     # Given the socket of the data channel will process all
215     # the incoming XML data, creating and updating the appropriate
216     # database files.
217     #
218     # $dsock = socket connected to the data channel
219     #
220     sub processdata() {
221     # the socket connected to the data channel
222     my($dsock) = @_;
223     # save us recreating this variable each time we loop
224     my($xml);
225    
226     while(1) {
227     # read data
228     $xml = <$dsock>;
229    
230     # something odd has happened
231     last if not defined $xml;
232    
233     # attempt to parse the data
234     my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
235     if($err) {
236 tdb 1.3 &error("Skipped, XML did not parse: $xml");
237 tdb 1.1 next;
238     }
239    
240     # standard data packet
241     if($xmlhash{"packet.attributes.type"} eq "data") {
242     my($machine) = $xmlhash{"packet.attributes.machine_name"};
243     my($date) = $xmlhash{"packet.attributes.date"};
244    
245     # make directory for machine
246     if(! -d "$rrddir/$machine") {
247     # not sure on this umask, but it seems to work?
248     mkdir "$rrddir/$machine", 0777;
249 tdb 1.3 &log("created directory $rrddir/$machine\n");
250 tdb 1.1 }
251    
252     my(@data);
253    
254     # cpu
255     @data = ( "packet.cpu.idle:idle:GAUGE",
256     "packet.cpu.user:user:GAUGE",
257     "packet.cpu.kernel:kernel:GAUGE",
258     "packet.cpu.swap:swap:GAUGE",
259     "packet.cpu.iowait:iowait:GAUGE",
260     );
261     &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
262    
263     # mem
264     @data = ( "packet.memory.free:free:GAUGE",
265     "packet.memory.total:total:GAUGE",
266 tdb 1.7 "packet.memory.cache:cache:GAUGE",
267 tdb 1.1 );
268     &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
269    
270     # load
271     @data = ( "packet.load.load1:load1:GAUGE",
272     "packet.load.load5:load5:GAUGE",
273     "packet.load.load15:load15:GAUGE",
274     );
275     &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
276    
277     # processes
278     @data = ( "packet.processes.cpu:cpu:GAUGE",
279     "packet.processes.sleeping:sleeping:GAUGE",
280     "packet.processes.stopped:stopped:GAUGE",
281     "packet.processes.total:total:GAUGE",
282     "packet.processes.zombie:zombie:GAUGE",
283     );
284     &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
285    
286     # swap
287     @data = ( "packet.swap.free:free:GAUGE",
288     "packet.swap.total:total:GAUGE",
289     );
290     &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
291    
292     # users
293     @data = ( "packet.users.count:count:GAUGE",
294     );
295     &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
296 tdb 1.5
297     # paging
298 tdb 1.6 @data = ( "packet.pages.pageins:pageins:GAUGE",
299     "packet.pages.pageouts:pageouts:GAUGE",
300 tdb 1.5 );
301     &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data);
302    
303 tdb 1.1 # disk
304     my($i) = 0;
305     while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
306     my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
307     $mount =~ s/_/$hex_underscore/g;
308     $mount =~ s/\//$hex_slash/g;
309 tdb 1.6 @data = ( "packet.disk.p$i.attributes.total:total:GAUGE",
310 tdb 1.1 "packet.disk.p$i.attributes.used:used:GAUGE",
311 tdb 1.5 "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE",
312     "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE",
313 tdb 1.1 );
314     &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
315     ++$i;
316 tdb 1.6 }
317    
318     # diskio
319     $i = 0;
320     while(defined $xmlhash{"packet.diskio.p$i.attributes.name"}) {
321     my($name) = $xmlhash{"packet.diskio.p$i.attributes.name"};
322     @data = ( "packet.diskio.p$i.attributes.rbytes:rbytes:GAUGE",
323     "packet.diskio.p$i.attributes.wbytes:wbytes:GAUGE",
324     );
325     &updaterrd($machine, "diskio-$name", $date, $rrdstep, \%xmlhash, @data);
326     ++$i
327     }
328    
329     # net
330     $i = 0;
331     while(defined $xmlhash{"packet.net.p$i.attributes.name"}) {
332     my($name) = $xmlhash{"packet.net.p$i.attributes.name"};
333     @data = ( "packet.net.p$i.attributes.rx:rx:GAUGE",
334     "packet.net.p$i.attributes.tx:tx:GAUGE",
335     );
336 tdb 1.7 &updaterrd($machine, "net-$name", $date, $rrdstep, \%xmlhash, @data);
337     ++$i
338     }
339    
340     # mailq
341     $i = 0;
342     while(defined $xmlhash{"packet.mailq.p$i.attributes.name"}) {
343     my($name) = $xmlhash{"packet.mailq.p$i.attributes.name"};
344     $name =~ s/\s+//g;
345     @data = ( "packet.mailq.p$i.attributes.size:size:GAUGE",
346     );
347     &updaterrd($machine, "mailq-$name", $date, $rrdstep, \%xmlhash, @data);
348 tdb 1.6 ++$i
349 tdb 1.1 }
350     }
351    
352     # queue statistics packet
353     elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
354     # psuedo machine for internal server stuff
355     my($machine) = "i-scream-server";
356     # make directory
357     if(! -d "$rrddir/$machine") {
358     # not sure on this umask, but it seems to work?
359     mkdir "$rrddir/$machine", 0777;
360 tdb 1.3 &log("created directory $rrddir/$machine\n");
361 tdb 1.1 }
362     my($hash) = $xmlhash{"packet.attributes.hashCode"};
363     my($date) = $xmlhash{"packet.attributes.date"};
364     my($name) = $xmlhash{"packet.attributes.name"};
365     # take a look to see if we have a shutdown packet...
366     if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
367     unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
368 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
369 tdb 1.1 next;
370     }
371     # look through to see how many internal queues we have
372     my($i) = 0;
373     while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
374     # see if the queue has been removed
375     if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
376     # delete the queues rrd
377     unlink "$rrddir/$machine/$hash\_$i.rrd";
378 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
379 tdb 1.1 # are there any other rrd's left on this queue? if not, cleanup.
380     # get a list of any that may be still there..
381     opendir(DIR, "$rrddir/$machine");
382     my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
383     closedir DIR;
384     # count them (+1 because an empty array is size -1)
385     my($rrdcount) = $#rrdcountfiles + 1;
386     if($rrdcount == 0) {
387     # clean up the def file and any images
388     unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
389 tdb 1.3 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
390 tdb 1.1 }
391     ++$i;
392     next;
393     }
394     # the &updaterrd will also do this check, but we want
395     # to write our def file out first
396     if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
397     open(DEF, ">$rrddir/$machine/$hash.def");
398     print DEF $name;
399     close DEF;
400 tdb 1.3 &log("created $rrddir/$machine/$hash.def\n");
401 tdb 1.1 }
402     my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
403     "packet.queue.attributes.total:total:COUNTER",
404     );
405     &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
406     ++$i;
407     }
408     }
409     else {
410 tdb 1.3 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
411 tdb 1.1 }
412     }
413    
414     # we'll now return from this sub and reconnect
415 tdb 1.3 &error("Data channel socket gave no data, bailing out...\n");
416 tdb 1.1 }
417    
418     #
419     # sub to update an rrd file
420     #
421     # $machine = name of the machine
422     # (eg. kernow.ukc.ac.uk)
423     # $type = the type of graph for the machine
424     # (eg. cpu)
425     # $date = the date of the item we want to add
426     # (in seconds since the epoch)
427     # $step = the interval at which the database steps
428     # used when we create a new rrd
429     # $xmlref = reference to the xml data packet
430     # @data = array containing data items to add
431     # (eg. "packet.cpu.user:user:GAUGE")
432     #
433     sub updaterrd() {
434     my($machine, $type, $date, $step, $xmlref, @data) = @_;
435     # get hold of the xmlhash we have a reference to
436     my(%xmlhash) = %$xmlref;
437     # check if we need to create a new rrd
438     if( ! -f "$rrddir/$machine/$type.rrd") {
439     my(@createdata);
440     # pull the details out of the data we've been given
441     foreach my $dataitem (@data) {
442     if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
443     push @createdata, "$1:$2";
444     }
445     }
446     # call the &makerrd to actually create the rrd
447 tdb 1.3 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
448 tdb 1.1 &makerrd($machine, $type, $date, $step, @createdata);
449     }
450     # get the details out of the data we've been given
451     my($updateparams) = "$date";
452     foreach my $dataitem (@data) {
453     if($dataitem =~ /^(\S+):\S+:\S+$/) {
454     # pull the values straight out of the xmlhash
455     my($value) = $xmlhash{$1};
456     # if it's undefined we'll set it to 0
457     # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
458     $value = "0" if not defined $value;
459     $updateparams .= ":$value";
460     }
461     }
462     # perform the update
463     RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
464 tdb 1.3 &log("updating $rrddir/$machine/$type.rrd\n");
465 tdb 1.1 my($err) = RRDs::error;
466 tdb 1.3 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
467 tdb 1.1 }
468    
469     #
470     # sub to create a new rrd file
471     #
472     # $machine = name of the machine
473     # (eg. kernow.ukc.ac.uk)
474     # $type = the type of graph for the machine
475     # (eg. cpu)
476     # $start = the date of the first item we want to add
477     # (in seconds since the epoch)
478     # $step = the interval at which the database steps
479     # @data = the data items we want to put in the rrd
480     # in the form: $dsname:dstype
481     # (eg. "size:GAUGE")
482     #
483     sub makerrd() {
484     my($machine, $type, $start, $step, @data) = @_;
485     # check if directory exists for rrd
486     if(! -d "$rrddir/$machine") {
487     # not sure on this umask, but it seems to work?
488     mkdir "$rrddir/$machine", 0777;
489 tdb 1.3 &log("created directory $rrddir/$machine\n");
490 tdb 1.1 }
491     my(@rrdcmd);
492     # we'll want to add our first data item at $start,
493     # so we start our rrd $step before that.
494     $start -= $step;
495     push @rrdcmd, "$rrddir/$machine/$type.rrd";
496     push @rrdcmd, "--start=$start";
497     push @rrdcmd, "--step=$step";
498     foreach my $dataitem (@data) {
499     # dataitem should be: "dsname:dstype"
500     if($dataitem =~ /^(\S+):(\S+)$/) {
501     push @rrdcmd, "DS:$1:$2:600:U:U";
502     }
503     }
504     push @rrdcmd, (
505     # 3h in 15s samples
506     "RRA:AVERAGE:0.5:1:720",
507     "RRA:MAX:0.5:1:720",
508     # 1d in 2m samples
509     "RRA:AVERAGE:0.5:8:720",
510     "RRA:MAX:0.5:8:720",
511     # 1w in 15m samples
512     "RRA:AVERAGE:0.5:60:672",
513     "RRA:MAX:0.5:60:672",
514     # 1m in 1hr samples
515     "RRA:AVERAGE:0.5:240:744",
516     "RRA:MAX:0.5:240:744",
517     # 1y in 12hr samples
518     "RRA:AVERAGE:0.5:2880:730",
519     "RRA:MAX:0.5:2880:730",
520     );
521     RRDs::create (@rrdcmd);
522     my($err) = RRDs::error;
523 tdb 1.3 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
524     }
525    
526     # prints out usage information then exits
527     sub usage() {
528     print "Usage: watch.pl [options] i-scream_client_interface port\n";
529     print "Options\n";
530     print " -c config Specifies the configuration file\n";
531     print " default: rrdgraphing.conf\n";
532     print " -v Be verbose about what's happening\n";
533     print " -q Be quiet, even supress errors\n";
534     print " -V Print version number\n";
535     print " -h Prints this help page\n";
536     exit(1);
537     }
538    
539     # prints a log message if verbose is turned on
540     sub log() {
541     my($msg) = @_;
542     print $msg if $verbose;
543     }
544    
545     # prints an error message unless quiet is turned on
546     sub error() {
547     my($msg) = @_;
548     print STDERR $msg unless $quiet;
549 tdb 1.1 }