ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.5
Committed: Mon Oct 21 13:02:58 2002 UTC (22 years, 1 month ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.4: +11 -3 lines
Log Message:
Add support for disk inode usage, and paging activity. Have added to both
the latest data page, and to the graphs sections. Also reworked the memory,
swap, and disk graphing to be percentage based.

File Contents

# User Rev Content
1 tdb 1.1 #!/usr/bin/perl -w
2    
3 tdb 1.2 #
4     # i-scream central monitoring system
5 tdb 1.4 # http://www.i-scream.org.uk
6 tdb 1.2 # Copyright (C) 2000-2002 i-scream
7     #
8     # This program is free software; you can redistribute it and/or
9     # modify it under the terms of the GNU General Public License
10     # as published by the Free Software Foundation; either version 2
11     # of the License, or (at your option) any later version.
12     #
13     # This program is distributed in the hope that it will be useful,
14     # but WITHOUT ANY WARRANTY; without even the implied warranty of
15     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     # GNU General Public License for more details.
17     #
18     # You should have received a copy of the GNU General Public License
19     # along with this program; if not, write to the Free Software
20     # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21     #
22    
23 tdb 1.1 # -----------------------------------------------------------
24     # i-scream graph generation scripts
25     # http://www.i-scream.org.uk
26     #
27     # Generates rrd databases for i-scream data by connecting to
28     # the i-scream server and collecting data.
29     #
30     # $Author: tdb $
31 tdb 1.5 # $Id: watch.pl,v 1.4 2002/05/21 16:47:16 tdb Exp $
32 tdb 1.1 #------------------------------------------------------------
33    
34 tdb 1.5 my($version) = '$Id: watch.pl,v 1.4 2002/05/21 16:47:16 tdb Exp $';
35 tdb 1.1
36     $| = 1;
37    
38     use strict;
39 tdb 1.3 use Getopt::Std;
40     use IO::Socket;
41 tdb 1.1 use iscream::XMLParser;
42     use RRDs;
43    
44 tdb 1.3 # define variables that will be read from the config
45     # nb. keep this insync with the config file!
46     use vars qw{
47     $imgdir $rrddir
48     $maxrrdage $maximgage $deleterrds $deleteimgs
49     $hex_slash $hex_underscore
50     $rrdstep $retry_wait
51     $verbose $quiet
52     };
53    
54     # default locate of the config file
55     my($configfile) = "rrdgraphing.conf";
56    
57     # check for command line arguments
58     my(%opts);
59     my($ret) = getopts('hvqVc:', \%opts);
60    
61     # if invalid argument given, $ret will not be 1
62     &usage() if $ret != 1;
63    
64     # first process the arguments which might mean we exit now
65    
66     # -h is usage
67     if($opts{h}) {
68     &usage();
69     }
70     # -V is version
71     if($opts{V}) {
72     print "watch.pl version: $version\n";
73     exit(1);
74     }
75 tdb 1.1
76 tdb 1.3 # Then try getting the config
77 tdb 1.1
78 tdb 1.3 # -c specifies the config file location
79     if($opts{c}) {
80     $configfile = $opts{c};
81     }
82     # suck in the config
83     &log("reading config from $configfile\n");
84     do $configfile;
85    
86     # Then any options we might want to override the config with
87    
88     # -v is verbose
89     if($opts{v}) {
90     $verbose = $opts{v};
91     }
92     # -q is verbose
93     if($opts{q}) {
94     $quiet = $opts{q};
95     # if we're meant to be quiet, we can hardly be verbose!
96     # $verbose = 0;
97     }
98 tdb 1.1
99 tdb 1.3 # Finally check for required arguments
100    
101     # check we still have two arguments left
102 tdb 1.1 if (@ARGV != 2) {
103 tdb 1.3 &usage();
104 tdb 1.1 }
105    
106     # user supplied client interface server and port
107     my($addr) = $ARGV[0];
108     my($cport) = $ARGV[1];
109    
110 tdb 1.3
111     # Main program loop
112 tdb 1.1 while(1) {
113    
114 tdb 1.3 &log("Connecting control channel to port $cport on $addr...\n");
115 tdb 1.1
116     # attempt to connect the control channel
117     my($csock) = new IO::Socket::INET(
118     PeerAddr => $addr,
119     PeerPort => $cport,
120     Proto => 'tcp'
121     );
122    
123     # if socket isn't defined connection failed
124     if (!defined $csock) {
125 tdb 1.3 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
126     &error("Please check that there is an i-scream server at this address.\n");
127 tdb 1.1 &wait_then_retry();
128     next;
129     }
130    
131     my($response);
132    
133     # client interface should send it's protocol ID
134     # we know about "PROTOCOL 1.1", and will only accept the same
135     $response = <$csock>;
136 tdb 1.3 &log("CLI sent: $response");
137 tdb 1.1 if ($response && $response ne "PROTOCOL 1.1\n") {
138 tdb 1.3 &error("The i-scream server sent an unexpected protocol ID: $response\n");
139 tdb 1.1 close($csock);
140     &wait_then_retry();
141     next;
142     }
143    
144     # send our identifier to the client interface
145     print $csock "rrdgraphing\n";
146 tdb 1.3 &log("we sent: rrdgraphing\n");
147 tdb 1.1 $response = <$csock>;
148 tdb 1.3 &log("CLI sent: $response");
149 tdb 1.1 if ($response && $response ne "OK\n") {
150 tdb 1.3 &error("Received unexpected response: $response\n");
151 tdb 1.1 close($csock);
152     &wait_then_retry();
153     next;
154     }
155    
156     # tell the client interface we'd like to start the data channel
157     print $csock "STARTDATA\n";
158 tdb 1.3 &log("we sent: STARTDATA\n");
159    
160 tdb 1.1 # the response should be the socket to connect the data channel to
161     $response = <$csock>;
162 tdb 1.3 &log("CLI sent: $response");
163 tdb 1.1 chomp $response;
164    
165     my($dport) = $response;
166 tdb 1.3 &log("Connecting data channel to port $dport on $addr...\n");
167 tdb 1.1
168     # attempt to connect the data channel
169     my($dsock) = new IO::Socket::INET(
170     PeerAddr => $addr,
171     PeerPort => $dport,
172     Proto => 'tcp'
173     ) or die "arse?";
174    
175     # if socket isn't defined connection failed
176     if (!defined $dsock) {
177 tdb 1.3 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
178     &error("Failure in communications.\n");
179 tdb 1.1 close($csock);
180     &wait_then_retry();
181     next;
182     }
183    
184     # the data channel should now be sending us data!
185    
186     # call sub to process data being received over the data channel
187     &processdata($dsock);
188    
189     # data processing has stopped, close sockets
190     close($csock);
191     close($dsock);
192    
193     # wait before retrying
194     &wait_then_retry();
195     }
196    
197     # we'll never reach here... unless 1 becomes false for some reason ;)
198     exit 0;
199    
200    
201     #
202     # wait for a while before retrying
203     #
204     sub wait_then_retry() {
205 tdb 1.3 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
206 tdb 1.1 sleep $retry_wait;
207     }
208    
209     #
210     # Given the socket of the data channel will process all
211     # the incoming XML data, creating and updating the appropriate
212     # database files.
213     #
214     # $dsock = socket connected to the data channel
215     #
216     sub processdata() {
217     # the socket connected to the data channel
218     my($dsock) = @_;
219     # save us recreating this variable each time we loop
220     my($xml);
221    
222     while(1) {
223     # read data
224     $xml = <$dsock>;
225    
226     # something odd has happened
227     last if not defined $xml;
228    
229     # attempt to parse the data
230     my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
231     if($err) {
232 tdb 1.3 &error("Skipped, XML did not parse: $xml");
233 tdb 1.1 next;
234     }
235    
236     # standard data packet
237     if($xmlhash{"packet.attributes.type"} eq "data") {
238     my($machine) = $xmlhash{"packet.attributes.machine_name"};
239     my($date) = $xmlhash{"packet.attributes.date"};
240    
241     # make directory for machine
242     if(! -d "$rrddir/$machine") {
243     # not sure on this umask, but it seems to work?
244     mkdir "$rrddir/$machine", 0777;
245 tdb 1.3 &log("created directory $rrddir/$machine\n");
246 tdb 1.1 }
247    
248     my(@data);
249    
250     # cpu
251     @data = ( "packet.cpu.idle:idle:GAUGE",
252     "packet.cpu.user:user:GAUGE",
253     "packet.cpu.kernel:kernel:GAUGE",
254     "packet.cpu.swap:swap:GAUGE",
255     "packet.cpu.iowait:iowait:GAUGE",
256     );
257     &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
258    
259     # mem
260     @data = ( "packet.memory.free:free:GAUGE",
261     "packet.memory.total:total:GAUGE",
262     );
263     &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
264    
265     # load
266     @data = ( "packet.load.load1:load1:GAUGE",
267     "packet.load.load5:load5:GAUGE",
268     "packet.load.load15:load15:GAUGE",
269     );
270     &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
271    
272     # processes
273     @data = ( "packet.processes.cpu:cpu:GAUGE",
274     "packet.processes.sleeping:sleeping:GAUGE",
275     "packet.processes.stopped:stopped:GAUGE",
276     "packet.processes.total:total:GAUGE",
277     "packet.processes.zombie:zombie:GAUGE",
278     );
279     &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
280    
281     # swap
282     @data = ( "packet.swap.free:free:GAUGE",
283     "packet.swap.total:total:GAUGE",
284     );
285     &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
286    
287     # users
288     @data = ( "packet.users.count:count:GAUGE",
289     );
290     &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
291 tdb 1.5
292     # paging
293     @data = ( "packet.pages.swapins:swapins:GAUGE",
294     "packet.pages.swapouts:swapouts:GAUGE",
295     );
296     &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data);
297    
298 tdb 1.1 # disk
299     my($i) = 0;
300     while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
301     my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
302     $mount =~ s/_/$hex_underscore/g;
303     $mount =~ s/\//$hex_slash/g;
304     @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE",
305     "packet.disk.p$i.attributes.used:used:GAUGE",
306 tdb 1.5 "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE",
307     "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE",
308 tdb 1.1 );
309     &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
310     ++$i;
311     }
312     }
313    
314     # queue statistics packet
315     elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
316     # psuedo machine for internal server stuff
317     my($machine) = "i-scream-server";
318     # make directory
319     if(! -d "$rrddir/$machine") {
320     # not sure on this umask, but it seems to work?
321     mkdir "$rrddir/$machine", 0777;
322 tdb 1.3 &log("created directory $rrddir/$machine\n");
323 tdb 1.1 }
324     my($hash) = $xmlhash{"packet.attributes.hashCode"};
325     my($date) = $xmlhash{"packet.attributes.date"};
326     my($name) = $xmlhash{"packet.attributes.name"};
327     # take a look to see if we have a shutdown packet...
328     if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
329     unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
330 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
331 tdb 1.1 next;
332     }
333     # look through to see how many internal queues we have
334     my($i) = 0;
335     while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
336     # see if the queue has been removed
337     if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
338     # delete the queues rrd
339     unlink "$rrddir/$machine/$hash\_$i.rrd";
340 tdb 1.3 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
341 tdb 1.1 # are there any other rrd's left on this queue? if not, cleanup.
342     # get a list of any that may be still there..
343     opendir(DIR, "$rrddir/$machine");
344     my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
345     closedir DIR;
346     # count them (+1 because an empty array is size -1)
347     my($rrdcount) = $#rrdcountfiles + 1;
348     if($rrdcount == 0) {
349     # clean up the def file and any images
350     unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
351 tdb 1.3 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
352 tdb 1.1 }
353     ++$i;
354     next;
355     }
356     # the &updaterrd will also do this check, but we want
357     # to write our def file out first
358     if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
359     open(DEF, ">$rrddir/$machine/$hash.def");
360     print DEF $name;
361     close DEF;
362 tdb 1.3 &log("created $rrddir/$machine/$hash.def\n");
363 tdb 1.1 }
364     my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
365     "packet.queue.attributes.total:total:COUNTER",
366     );
367     &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
368     ++$i;
369     }
370     }
371     else {
372 tdb 1.3 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
373 tdb 1.1 }
374     }
375    
376     # we'll now return from this sub and reconnect
377 tdb 1.3 &error("Data channel socket gave no data, bailing out...\n");
378 tdb 1.1 }
379    
380     #
381     # sub to update an rrd file
382     #
383     # $machine = name of the machine
384     # (eg. kernow.ukc.ac.uk)
385     # $type = the type of graph for the machine
386     # (eg. cpu)
387     # $date = the date of the item we want to add
388     # (in seconds since the epoch)
389     # $step = the interval at which the database steps
390     # used when we create a new rrd
391     # $xmlref = reference to the xml data packet
392     # @data = array containing data items to add
393     # (eg. "packet.cpu.user:user:GAUGE")
394     #
395     sub updaterrd() {
396     my($machine, $type, $date, $step, $xmlref, @data) = @_;
397     # get hold of the xmlhash we have a reference to
398     my(%xmlhash) = %$xmlref;
399     # check if we need to create a new rrd
400     if( ! -f "$rrddir/$machine/$type.rrd") {
401     my(@createdata);
402     # pull the details out of the data we've been given
403     foreach my $dataitem (@data) {
404     if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
405     push @createdata, "$1:$2";
406     }
407     }
408     # call the &makerrd to actually create the rrd
409 tdb 1.3 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
410 tdb 1.1 &makerrd($machine, $type, $date, $step, @createdata);
411     }
412     # get the details out of the data we've been given
413     my($updateparams) = "$date";
414     foreach my $dataitem (@data) {
415     if($dataitem =~ /^(\S+):\S+:\S+$/) {
416     # pull the values straight out of the xmlhash
417     my($value) = $xmlhash{$1};
418     # if it's undefined we'll set it to 0
419     # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
420     $value = "0" if not defined $value;
421     $updateparams .= ":$value";
422     }
423     }
424     # perform the update
425     RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
426 tdb 1.3 &log("updating $rrddir/$machine/$type.rrd\n");
427 tdb 1.1 my($err) = RRDs::error;
428 tdb 1.3 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
429 tdb 1.1 }
430    
431     #
432     # sub to create a new rrd file
433     #
434     # $machine = name of the machine
435     # (eg. kernow.ukc.ac.uk)
436     # $type = the type of graph for the machine
437     # (eg. cpu)
438     # $start = the date of the first item we want to add
439     # (in seconds since the epoch)
440     # $step = the interval at which the database steps
441     # @data = the data items we want to put in the rrd
442     # in the form: $dsname:dstype
443     # (eg. "size:GAUGE")
444     #
445     sub makerrd() {
446     my($machine, $type, $start, $step, @data) = @_;
447     # check if directory exists for rrd
448     if(! -d "$rrddir/$machine") {
449     # not sure on this umask, but it seems to work?
450     mkdir "$rrddir/$machine", 0777;
451 tdb 1.3 &log("created directory $rrddir/$machine\n");
452 tdb 1.1 }
453     my(@rrdcmd);
454     # we'll want to add our first data item at $start,
455     # so we start our rrd $step before that.
456     $start -= $step;
457     push @rrdcmd, "$rrddir/$machine/$type.rrd";
458     push @rrdcmd, "--start=$start";
459     push @rrdcmd, "--step=$step";
460     foreach my $dataitem (@data) {
461     # dataitem should be: "dsname:dstype"
462     if($dataitem =~ /^(\S+):(\S+)$/) {
463     push @rrdcmd, "DS:$1:$2:600:U:U";
464     }
465     }
466     push @rrdcmd, (
467     # 3h in 15s samples
468     "RRA:AVERAGE:0.5:1:720",
469     "RRA:MAX:0.5:1:720",
470     # 1d in 2m samples
471     "RRA:AVERAGE:0.5:8:720",
472     "RRA:MAX:0.5:8:720",
473     # 1w in 15m samples
474     "RRA:AVERAGE:0.5:60:672",
475     "RRA:MAX:0.5:60:672",
476     # 1m in 1hr samples
477     "RRA:AVERAGE:0.5:240:744",
478     "RRA:MAX:0.5:240:744",
479     # 1y in 12hr samples
480     "RRA:AVERAGE:0.5:2880:730",
481     "RRA:MAX:0.5:2880:730",
482     );
483     RRDs::create (@rrdcmd);
484     my($err) = RRDs::error;
485 tdb 1.3 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
486     }
487    
488     # prints out usage information then exits
489     sub usage() {
490     print "Usage: watch.pl [options] i-scream_client_interface port\n";
491     print "Options\n";
492     print " -c config Specifies the configuration file\n";
493     print " default: rrdgraphing.conf\n";
494     print " -v Be verbose about what's happening\n";
495     print " -q Be quiet, even supress errors\n";
496     print " -V Print version number\n";
497     print " -h Prints this help page\n";
498     exit(1);
499     }
500    
501     # prints a log message if verbose is turned on
502     sub log() {
503     my($msg) = @_;
504     print $msg if $verbose;
505     }
506    
507     # prints an error message unless quiet is turned on
508     sub error() {
509     my($msg) = @_;
510     print STDERR $msg unless $quiet;
511 tdb 1.1 }