ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.3
Committed: Tue May 21 15:01:43 2002 UTC (22 years, 6 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.2: +116 -41 lines
Log Message:
Changed to have a shared config file. Also parses command line options to
make it more verbose or quiet.

File Contents

# User Rev Content
1 tdb 1.1 #!/usr/bin/perl -w
2    
3 tdb 1.2 #
4     # i-scream central monitoring system
5     # Copyright (C) 2000-2002 i-scream
6     #
7     # This program is free software; you can redistribute it and/or
8     # modify it under the terms of the GNU General Public License
9     # as published by the Free Software Foundation; either version 2
10     # of the License, or (at your option) any later version.
11     #
12     # This program is distributed in the hope that it will be useful,
13     # but WITHOUT ANY WARRANTY; without even the implied warranty of
14     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     # GNU General Public License for more details.
16     #
17     # You should have received a copy of the GNU General Public License
18     # along with this program; if not, write to the Free Software
19     # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20     #
21    
22 tdb 1.1 # -----------------------------------------------------------
23     # i-scream graph generation scripts
24     # http://www.i-scream.org.uk
25     #
26     # Generates rrd databases for i-scream data by connecting to
27     # the i-scream server and collecting data.
28     #
29     # $Author: tdb $
30 tdb 1.3 # $Id: watch.pl,v 1.2 2002/05/18 18:15:59 tdb Exp $
31 tdb 1.1 #------------------------------------------------------------
32    
33 tdb 1.3 my($version) = '$Id$';
34 tdb 1.1
35     $| = 1;
36    
37     use strict;
38 tdb 1.3 use Getopt::Std;
39     use IO::Socket;
40 tdb 1.1 use iscream::XMLParser;
41     use RRDs;
42    
43 tdb 1.3 # define variables that will be read from the config
44     # nb. keep this insync with the config file!
45     use vars qw{
46     $imgdir $rrddir
47     $maxrrdage $maximgage $deleterrds $deleteimgs
48     $hex_slash $hex_underscore
49     $rrdstep $retry_wait
50     $verbose $quiet
51     };
52    
53     # default locate of the config file
54     my($configfile) = "rrdgraphing.conf";
55    
56     # check for command line arguments
57     my(%opts);
58     my($ret) = getopts('hvqVc:', \%opts);
59    
60     # if invalid argument given, $ret will not be 1
61     &usage() if $ret != 1;
62    
63     # first process the arguments which might mean we exit now
64    
65     # -h is usage
66     if($opts{h}) {
67     &usage();
68     }
69     # -V is version
70     if($opts{V}) {
71     print "watch.pl version: $version\n";
72     exit(1);
73     }
74 tdb 1.1
75 tdb 1.3 # Then try getting the config
76 tdb 1.1
77 tdb 1.3 # -c specifies the config file location
78     if($opts{c}) {
79     $configfile = $opts{c};
80     }
81     # suck in the config
82     &log("reading config from $configfile\n");
83     do $configfile;
84    
85     # Then any options we might want to override the config with
86    
87     # -v is verbose
88     if($opts{v}) {
89     $verbose = $opts{v};
90     }
91     # -q is verbose
92     if($opts{q}) {
93     $quiet = $opts{q};
94     # if we're meant to be quiet, we can hardly be verbose!
95     # $verbose = 0;
96     }
97 tdb 1.1
98 tdb 1.3 # Finally check for required arguments
99    
100     # check we still have two arguments left
101 tdb 1.1 if (@ARGV != 2) {
102 tdb 1.3 &usage();
103 tdb 1.1 }
104    
105     # user supplied client interface server and port
106     my($addr) = $ARGV[0];
107     my($cport) = $ARGV[1];
108    
109 tdb 1.3
110     # Main program loop
111 tdb 1.1 while(1) {
112    
113 tdb 1.3 &log("Connecting control channel to port $cport on $addr...\n");
114 tdb 1.1
115     # attempt to connect the control channel
116     my($csock) = new IO::Socket::INET(
117     PeerAddr => $addr,
118     PeerPort => $cport,
119     Proto => 'tcp'
120     );
121    
122     # if socket isn't defined connection failed
123     if (!defined $csock) {
124 tdb 1.3 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
125     &error("Please check that there is an i-scream server at this address.\n");
126 tdb 1.1 &wait_then_retry();
127     next;
128     }
129    
130     my($response);
131    
132     # client interface should send it's protocol ID
133     # we know about "PROTOCOL 1.1", and will only accept the same
134     $response = <$csock>;
135 tdb 1.3 &log("CLI sent: $response");
136 tdb 1.1 if ($response && $response ne "PROTOCOL 1.1\n") {
137 tdb 1.3 &error("The i-scream server sent an unexpected protocol ID: $response\n");
138 tdb 1.1 close($csock);
139     &wait_then_retry();
140     next;
141     }
142    
143     # send our identifier to the client interface
144     print $csock "rrdgraphing\n";
145 tdb 1.3 &log("we sent: rrdgraphing\n");
146 tdb 1.1 $response = <$csock>;
147 tdb 1.3 &log("CLI sent: $response");
148 tdb 1.1 if ($response && $response ne "OK\n") {
149 tdb 1.3 &error("Received unexpected response: $response\n");
150 tdb 1.1 close($csock);
151     &wait_then_retry();
152     next;
153     }
154    
155     # tell the client interface we'd like to start the data channel
156     print $csock "STARTDATA\n";
157 tdb 1.3 &log("we sent: STARTDATA\n");
158    
159 tdb 1.1 # the response should be the socket to connect the data channel to
160     $response = <$csock>;
161 tdb 1.3 &log("CLI sent: $response");
162 tdb 1.1 chomp $response;
163    
164     my($dport) = $response;
165 tdb 1.3 &log("Connecting data channel to port $dport on $addr...\n");
166 tdb 1.1
167     # attempt to connect the data channel
168     my($dsock) = new IO::Socket::INET(
169     PeerAddr => $addr,
170     PeerPort => $dport,
171     Proto => 'tcp'
172     ) or die "arse?";
173    
174     # if socket isn't defined connection failed
175     if (!defined $dsock) {
176 tdb 1.3 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
177     &error("Failure in communications.\n");
178 tdb 1.1 close($csock);
179     &wait_then_retry();
180     next;
181     }
182    
183     # the data channel should now be sending us data!
184    
185     # call sub to process data being received over the data channel
186     &processdata($dsock);
187    
188     # data processing has stopped, close sockets
189     close($csock);
190     close($dsock);
191    
192     # wait before retrying
193     &wait_then_retry();
194     }
195    
196     # we'll never reach here... unless 1 becomes false for some reason ;)
197     exit 0;
198    
199    
200     #
201     # wait for a while before retrying
202     #
203     sub wait_then_retry() {
204 tdb 1.3 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
205 tdb 1.1 sleep $retry_wait;
206     }
207    
208     #
209     # Given the socket of the data channel will process all
210     # the incoming XML data, creating and updating the appropriate
211     # database files.
212     #
213     # $dsock = socket connected to the data channel
214     #
215     sub processdata() {
216     # the socket connected to the data channel
217     my($dsock) = @_;
218     # save us recreating this variable each time we loop
219     my($xml);
220    
221     while(1) {
222     # read data
223     $xml = <$dsock>;
224    
225     # something odd has happened
226     last if not defined $xml;
227    
228     # attempt to parse the data
229     my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
230     if($err) {
231 tdb 1.3 &error("Skipped, XML did not parse: $xml");
232 tdb 1.1 next;
233     }
234    
235     # standard data packet
236     if($xmlhash{"packet.attributes.type"} eq "data") {
237     my($machine) = $xmlhash{"packet.attributes.machine_name"};
238     my($date) = $xmlhash{"packet.attributes.date"};
239    
240     # make directory for machine
241     if(! -d "$rrddir/$machine") {
242     # not sure on this umask, but it seems to work?
243     mkdir "$rrddir/$machine", 0777;
244 tdb 1.3 &log("created directory $rrddir/$machine\n");
245 tdb 1.1 }
246    
247     my(@data);
248    
249     # cpu
250     @data = ( "packet.cpu.idle:idle:GAUGE",
251     "packet.cpu.user:user:GAUGE",
252     "packet.cpu.kernel:kernel:GAUGE",
253     "packet.cpu.swap:swap:GAUGE",
254     "packet.cpu.iowait:iowait:GAUGE",
255     );
256     &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
257    
258     # mem
259     @data = ( "packet.memory.free:free:GAUGE",
260     "packet.memory.total:total:GAUGE",
261     );
262     &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
263    
264     # load
265     @data = ( "packet.load.load1:load1:GAUGE",
266     "packet.load.load5:load5:GAUGE",
267     "packet.load.load15:load15:GAUGE",
268     );
269     &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
270    
271     # processes
272     @data = ( "packet.processes.cpu:cpu:GAUGE",
273     "packet.processes.sleeping:sleeping:GAUGE",
274     "packet.processes.stopped:stopped:GAUGE",
275     "packet.processes.total:total:GAUGE",
276     "packet.processes.zombie:zombie:GAUGE",
277     );
278     &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
279    
280     # swap
281     @data = ( "packet.swap.free:free:GAUGE",
282     "packet.swap.total:total:GAUGE",
283     );
284     &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
285    
286     # users
287     @data = ( "packet.users.count:count:GAUGE",
288     );
289     &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
290    
291     # disk
292     my($i) = 0;
293     while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
294     my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
295     $mount =~ s/_/$hex_underscore/g;
296     $mount =~ s/\//$hex_slash/g;
297     @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE",
298     "packet.disk.p$i.attributes.used:used:GAUGE",
299     );
300     &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
301     ++$i;
302     }
303     }
304    
305     # queue statistics packet
306     elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
307     # psuedo machine for internal server stuff
308     my($machine) = "i-scream-server";
309     # make directory
310     if(! -d "$rrddir/$machine") {
311     # not sure on this umask, but it seems to work?
312     mkdir "$rrddir/$machine", 0777;
313 tdb 1.3 &log("created directory $rrddir/$machine\n");
314 tdb 1.1 }
315     my($hash) = $xmlhash{"packet.attributes.hashCode"};
316     my($date) = $xmlhash{"packet.attributes.date"};
317     my($name) = $xmlhash{"packet.attributes.name"};
318     # take a look to see if we have a shutdown packet...
319     if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
320     unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
321 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
322 tdb 1.1 next;
323     }
324     # look through to see how many internal queues we have
325     my($i) = 0;
326     while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
327     # see if the queue has been removed
328     if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
329     # delete the queues rrd
330     unlink "$rrddir/$machine/$hash\_$i.rrd";
331 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
332 tdb 1.1 # are there any other rrd's left on this queue? if not, cleanup.
333     # get a list of any that may be still there..
334     opendir(DIR, "$rrddir/$machine");
335     my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
336     closedir DIR;
337     # count them (+1 because an empty array is size -1)
338     my($rrdcount) = $#rrdcountfiles + 1;
339     if($rrdcount == 0) {
340     # clean up the def file and any images
341     unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
342 tdb 1.3 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
343 tdb 1.1 }
344     ++$i;
345     next;
346     }
347     # the &updaterrd will also do this check, but we want
348     # to write our def file out first
349     if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
350     open(DEF, ">$rrddir/$machine/$hash.def");
351     print DEF $name;
352     close DEF;
353 tdb 1.3 &log("created $rrddir/$machine/$hash.def\n");
354 tdb 1.1 }
355     my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
356     "packet.queue.attributes.total:total:COUNTER",
357     );
358     &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
359     ++$i;
360     }
361     }
362     else {
363 tdb 1.3 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
364 tdb 1.1 }
365     }
366    
367     # we'll now return from this sub and reconnect
368 tdb 1.3 &error("Data channel socket gave no data, bailing out...\n");
369 tdb 1.1 }
370    
371     #
372     # sub to update an rrd file
373     #
374     # $machine = name of the machine
375     # (eg. kernow.ukc.ac.uk)
376     # $type = the type of graph for the machine
377     # (eg. cpu)
378     # $date = the date of the item we want to add
379     # (in seconds since the epoch)
380     # $step = the interval at which the database steps
381     # used when we create a new rrd
382     # $xmlref = reference to the xml data packet
383     # @data = array containing data items to add
384     # (eg. "packet.cpu.user:user:GAUGE")
385     #
386     sub updaterrd() {
387     my($machine, $type, $date, $step, $xmlref, @data) = @_;
388     # get hold of the xmlhash we have a reference to
389     my(%xmlhash) = %$xmlref;
390     # check if we need to create a new rrd
391     if( ! -f "$rrddir/$machine/$type.rrd") {
392     my(@createdata);
393     # pull the details out of the data we've been given
394     foreach my $dataitem (@data) {
395     if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
396     push @createdata, "$1:$2";
397     }
398     }
399     # call the &makerrd to actually create the rrd
400 tdb 1.3 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
401 tdb 1.1 &makerrd($machine, $type, $date, $step, @createdata);
402     }
403     # get the details out of the data we've been given
404     my($updateparams) = "$date";
405     foreach my $dataitem (@data) {
406     if($dataitem =~ /^(\S+):\S+:\S+$/) {
407     # pull the values straight out of the xmlhash
408     my($value) = $xmlhash{$1};
409     # if it's undefined we'll set it to 0
410     # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
411     $value = "0" if not defined $value;
412     $updateparams .= ":$value";
413     }
414     }
415     # perform the update
416     RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
417 tdb 1.3 &log("updating $rrddir/$machine/$type.rrd\n");
418 tdb 1.1 my($err) = RRDs::error;
419 tdb 1.3 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
420 tdb 1.1 }
421    
422     #
423     # sub to create a new rrd file
424     #
425     # $machine = name of the machine
426     # (eg. kernow.ukc.ac.uk)
427     # $type = the type of graph for the machine
428     # (eg. cpu)
429     # $start = the date of the first item we want to add
430     # (in seconds since the epoch)
431     # $step = the interval at which the database steps
432     # @data = the data items we want to put in the rrd
433     # in the form: $dsname:dstype
434     # (eg. "size:GAUGE")
435     #
436     sub makerrd() {
437     my($machine, $type, $start, $step, @data) = @_;
438     # check if directory exists for rrd
439     if(! -d "$rrddir/$machine") {
440     # not sure on this umask, but it seems to work?
441     mkdir "$rrddir/$machine", 0777;
442 tdb 1.3 &log("created directory $rrddir/$machine\n");
443 tdb 1.1 }
444     my(@rrdcmd);
445     # we'll want to add our first data item at $start,
446     # so we start our rrd $step before that.
447     $start -= $step;
448     push @rrdcmd, "$rrddir/$machine/$type.rrd";
449     push @rrdcmd, "--start=$start";
450     push @rrdcmd, "--step=$step";
451     foreach my $dataitem (@data) {
452     # dataitem should be: "dsname:dstype"
453     if($dataitem =~ /^(\S+):(\S+)$/) {
454     push @rrdcmd, "DS:$1:$2:600:U:U";
455     }
456     }
457     push @rrdcmd, (
458     # 3h in 15s samples
459     "RRA:AVERAGE:0.5:1:720",
460     "RRA:MAX:0.5:1:720",
461     # 1d in 2m samples
462     "RRA:AVERAGE:0.5:8:720",
463     "RRA:MAX:0.5:8:720",
464     # 1w in 15m samples
465     "RRA:AVERAGE:0.5:60:672",
466     "RRA:MAX:0.5:60:672",
467     # 1m in 1hr samples
468     "RRA:AVERAGE:0.5:240:744",
469     "RRA:MAX:0.5:240:744",
470     # 1y in 12hr samples
471     "RRA:AVERAGE:0.5:2880:730",
472     "RRA:MAX:0.5:2880:730",
473     );
474     RRDs::create (@rrdcmd);
475     my($err) = RRDs::error;
476 tdb 1.3 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
477     }
478    
479     # prints out usage information then exits
480     sub usage() {
481     print "Usage: watch.pl [options] i-scream_client_interface port\n";
482     print "Options\n";
483     print " -c config Specifies the configuration file\n";
484     print " default: rrdgraphing.conf\n";
485     print " -v Be verbose about what's happening\n";
486     print " -q Be quiet, even supress errors\n";
487     print " -V Print version number\n";
488     print " -h Prints this help page\n";
489     exit(1);
490     }
491    
492     # prints a log message if verbose is turned on
493     sub log() {
494     my($msg) = @_;
495     print $msg if $verbose;
496     }
497    
498     # prints an error message unless quiet is turned on
499     sub error() {
500     my($msg) = @_;
501     print STDERR $msg unless $quiet;
502 tdb 1.1 }