ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.7
Committed: Thu Mar 4 11:27:30 2004 UTC (20 years, 8 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.6: +19 -4 lines
Log Message:
Latest versions of the rrdgraphing code. Some graphs now use areas, others
use stacks. Added cache memory, and our mail queue graphs.

File Contents

# Content
1 #!/usr/bin/perl -w
2
3 #
4 # i-scream central monitoring system
5 # http://www.i-scream.org.uk
6 # Copyright (C) 2000-2002 i-scream
7 #
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License
10 # as published by the Free Software Foundation; either version 2
11 # of the License, or (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 #
22
23 # -----------------------------------------------------------
24 # i-scream graph generation scripts
25 # http://www.i-scream.org.uk
26 #
27 # Generates rrd databases for i-scream data by connecting to
28 # the i-scream server and collecting data.
29 #
30 # $Author: tdb $
31 # $Id: watch.pl,v 1.6 2003/03/10 11:34:43 tdb Exp $
32 #------------------------------------------------------------
33
34 BEGIN {
35 push (@INC, "/usr/local/packages/rrdtool/lib/perl5/site_perl/5.8.2/sun4-solaris");
36 }
37
38 my($version) = '$Id: watch.pl,v 1.6 2003/03/10 11:34:43 tdb Exp $';
39
40 $| = 1;
41
42 use strict;
43 use Getopt::Std;
44 use IO::Socket;
45 use iscream::XMLParser;
46 use RRDs;
47
48 # define variables that will be read from the config
49 # nb. keep this insync with the config file!
50 use vars qw{
51 $imgdir $rrddir
52 $maxrrdage $maximgage $deleterrds $deleteimgs
53 $hex_slash $hex_underscore
54 $rrdstep $retry_wait
55 $verbose $quiet
56 };
57
58 # default locate of the config file
59 my($configfile) = "rrdgraphing.conf";
60
61 # check for command line arguments
62 my(%opts);
63 my($ret) = getopts('hvqVc:', \%opts);
64
65 # if invalid argument given, $ret will not be 1
66 &usage() if $ret != 1;
67
68 # first process the arguments which might mean we exit now
69
70 # -h is usage
71 if($opts{h}) {
72 &usage();
73 }
74 # -V is version
75 if($opts{V}) {
76 print "watch.pl version: $version\n";
77 exit(1);
78 }
79
80 # Then try getting the config
81
82 # -c specifies the config file location
83 if($opts{c}) {
84 $configfile = $opts{c};
85 }
86 # suck in the config
87 &log("reading config from $configfile\n");
88 do $configfile;
89
90 # Then any options we might want to override the config with
91
92 # -v is verbose
93 if($opts{v}) {
94 $verbose = $opts{v};
95 }
96 # -q is verbose
97 if($opts{q}) {
98 $quiet = $opts{q};
99 # if we're meant to be quiet, we can hardly be verbose!
100 # $verbose = 0;
101 }
102
103 # Finally check for required arguments
104
105 # check we still have two arguments left
106 if (@ARGV != 2) {
107 &usage();
108 }
109
110 # user supplied client interface server and port
111 my($addr) = $ARGV[0];
112 my($cport) = $ARGV[1];
113
114
115 # Main program loop
116 while(1) {
117
118 &log("Connecting control channel to port $cport on $addr...\n");
119
120 # attempt to connect the control channel
121 my($csock) = new IO::Socket::INET(
122 PeerAddr => $addr,
123 PeerPort => $cport,
124 Proto => 'tcp'
125 );
126
127 # if socket isn't defined connection failed
128 if (!defined $csock) {
129 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
130 &error("Please check that there is an i-scream server at this address.\n");
131 &wait_then_retry();
132 next;
133 }
134
135 my($response);
136
137 # client interface should send it's protocol ID
138 # we know about "PROTOCOL 1.1", and will only accept the same
139 $response = <$csock>;
140 &log("CLI sent: $response");
141 if ($response && $response ne "PROTOCOL 1.1\n") {
142 &error("The i-scream server sent an unexpected protocol ID: $response\n");
143 close($csock);
144 &wait_then_retry();
145 next;
146 }
147
148 # send our identifier to the client interface
149 print $csock "rrdgraphing\n";
150 &log("we sent: rrdgraphing\n");
151 $response = <$csock>;
152 &log("CLI sent: $response");
153 if ($response && $response ne "OK\n") {
154 &error("Received unexpected response: $response\n");
155 close($csock);
156 &wait_then_retry();
157 next;
158 }
159
160 # tell the client interface we'd like to start the data channel
161 print $csock "STARTDATA\n";
162 &log("we sent: STARTDATA\n");
163
164 # the response should be the socket to connect the data channel to
165 $response = <$csock>;
166 &log("CLI sent: $response");
167 chomp $response;
168
169 my($dport) = $response;
170 &log("Connecting data channel to port $dport on $addr...\n");
171
172 # attempt to connect the data channel
173 my($dsock) = new IO::Socket::INET(
174 PeerAddr => $addr,
175 PeerPort => $dport,
176 Proto => 'tcp'
177 ) or die "arse?";
178
179 # if socket isn't defined connection failed
180 if (!defined $dsock) {
181 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
182 &error("Failure in communications.\n");
183 close($csock);
184 &wait_then_retry();
185 next;
186 }
187
188 # the data channel should now be sending us data!
189
190 # call sub to process data being received over the data channel
191 &processdata($dsock);
192
193 # data processing has stopped, close sockets
194 close($csock);
195 close($dsock);
196
197 # wait before retrying
198 &wait_then_retry();
199 }
200
201 # we'll never reach here... unless 1 becomes false for some reason ;)
202 exit 0;
203
204
205 #
206 # wait for a while before retrying
207 #
208 sub wait_then_retry() {
209 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
210 sleep $retry_wait;
211 }
212
213 #
214 # Given the socket of the data channel will process all
215 # the incoming XML data, creating and updating the appropriate
216 # database files.
217 #
218 # $dsock = socket connected to the data channel
219 #
220 sub processdata() {
221 # the socket connected to the data channel
222 my($dsock) = @_;
223 # save us recreating this variable each time we loop
224 my($xml);
225
226 while(1) {
227 # read data
228 $xml = <$dsock>;
229
230 # something odd has happened
231 last if not defined $xml;
232
233 # attempt to parse the data
234 my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
235 if($err) {
236 &error("Skipped, XML did not parse: $xml");
237 next;
238 }
239
240 # standard data packet
241 if($xmlhash{"packet.attributes.type"} eq "data") {
242 my($machine) = $xmlhash{"packet.attributes.machine_name"};
243 my($date) = $xmlhash{"packet.attributes.date"};
244
245 # make directory for machine
246 if(! -d "$rrddir/$machine") {
247 # not sure on this umask, but it seems to work?
248 mkdir "$rrddir/$machine", 0777;
249 &log("created directory $rrddir/$machine\n");
250 }
251
252 my(@data);
253
254 # cpu
255 @data = ( "packet.cpu.idle:idle:GAUGE",
256 "packet.cpu.user:user:GAUGE",
257 "packet.cpu.kernel:kernel:GAUGE",
258 "packet.cpu.swap:swap:GAUGE",
259 "packet.cpu.iowait:iowait:GAUGE",
260 );
261 &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
262
263 # mem
264 @data = ( "packet.memory.free:free:GAUGE",
265 "packet.memory.total:total:GAUGE",
266 "packet.memory.cache:cache:GAUGE",
267 );
268 &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
269
270 # load
271 @data = ( "packet.load.load1:load1:GAUGE",
272 "packet.load.load5:load5:GAUGE",
273 "packet.load.load15:load15:GAUGE",
274 );
275 &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
276
277 # processes
278 @data = ( "packet.processes.cpu:cpu:GAUGE",
279 "packet.processes.sleeping:sleeping:GAUGE",
280 "packet.processes.stopped:stopped:GAUGE",
281 "packet.processes.total:total:GAUGE",
282 "packet.processes.zombie:zombie:GAUGE",
283 );
284 &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
285
286 # swap
287 @data = ( "packet.swap.free:free:GAUGE",
288 "packet.swap.total:total:GAUGE",
289 );
290 &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
291
292 # users
293 @data = ( "packet.users.count:count:GAUGE",
294 );
295 &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
296
297 # paging
298 @data = ( "packet.pages.pageins:pageins:GAUGE",
299 "packet.pages.pageouts:pageouts:GAUGE",
300 );
301 &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data);
302
303 # disk
304 my($i) = 0;
305 while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
306 my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
307 $mount =~ s/_/$hex_underscore/g;
308 $mount =~ s/\//$hex_slash/g;
309 @data = ( "packet.disk.p$i.attributes.total:total:GAUGE",
310 "packet.disk.p$i.attributes.used:used:GAUGE",
311 "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE",
312 "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE",
313 );
314 &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
315 ++$i;
316 }
317
318 # diskio
319 $i = 0;
320 while(defined $xmlhash{"packet.diskio.p$i.attributes.name"}) {
321 my($name) = $xmlhash{"packet.diskio.p$i.attributes.name"};
322 @data = ( "packet.diskio.p$i.attributes.rbytes:rbytes:GAUGE",
323 "packet.diskio.p$i.attributes.wbytes:wbytes:GAUGE",
324 );
325 &updaterrd($machine, "diskio-$name", $date, $rrdstep, \%xmlhash, @data);
326 ++$i
327 }
328
329 # net
330 $i = 0;
331 while(defined $xmlhash{"packet.net.p$i.attributes.name"}) {
332 my($name) = $xmlhash{"packet.net.p$i.attributes.name"};
333 @data = ( "packet.net.p$i.attributes.rx:rx:GAUGE",
334 "packet.net.p$i.attributes.tx:tx:GAUGE",
335 );
336 &updaterrd($machine, "net-$name", $date, $rrdstep, \%xmlhash, @data);
337 ++$i
338 }
339
340 # mailq
341 $i = 0;
342 while(defined $xmlhash{"packet.mailq.p$i.attributes.name"}) {
343 my($name) = $xmlhash{"packet.mailq.p$i.attributes.name"};
344 $name =~ s/\s+//g;
345 @data = ( "packet.mailq.p$i.attributes.size:size:GAUGE",
346 );
347 &updaterrd($machine, "mailq-$name", $date, $rrdstep, \%xmlhash, @data);
348 ++$i
349 }
350 }
351
352 # queue statistics packet
353 elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
354 # psuedo machine for internal server stuff
355 my($machine) = "i-scream-server";
356 # make directory
357 if(! -d "$rrddir/$machine") {
358 # not sure on this umask, but it seems to work?
359 mkdir "$rrddir/$machine", 0777;
360 &log("created directory $rrddir/$machine\n");
361 }
362 my($hash) = $xmlhash{"packet.attributes.hashCode"};
363 my($date) = $xmlhash{"packet.attributes.date"};
364 my($name) = $xmlhash{"packet.attributes.name"};
365 # take a look to see if we have a shutdown packet...
366 if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
367 unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
368 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
369 next;
370 }
371 # look through to see how many internal queues we have
372 my($i) = 0;
373 while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
374 # see if the queue has been removed
375 if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
376 # delete the queues rrd
377 unlink "$rrddir/$machine/$hash\_$i.rrd";
378 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
379 # are there any other rrd's left on this queue? if not, cleanup.
380 # get a list of any that may be still there..
381 opendir(DIR, "$rrddir/$machine");
382 my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
383 closedir DIR;
384 # count them (+1 because an empty array is size -1)
385 my($rrdcount) = $#rrdcountfiles + 1;
386 if($rrdcount == 0) {
387 # clean up the def file and any images
388 unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
389 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
390 }
391 ++$i;
392 next;
393 }
394 # the &updaterrd will also do this check, but we want
395 # to write our def file out first
396 if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
397 open(DEF, ">$rrddir/$machine/$hash.def");
398 print DEF $name;
399 close DEF;
400 &log("created $rrddir/$machine/$hash.def\n");
401 }
402 my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
403 "packet.queue.attributes.total:total:COUNTER",
404 );
405 &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
406 ++$i;
407 }
408 }
409 else {
410 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
411 }
412 }
413
414 # we'll now return from this sub and reconnect
415 &error("Data channel socket gave no data, bailing out...\n");
416 }
417
418 #
419 # sub to update an rrd file
420 #
421 # $machine = name of the machine
422 # (eg. kernow.ukc.ac.uk)
423 # $type = the type of graph for the machine
424 # (eg. cpu)
425 # $date = the date of the item we want to add
426 # (in seconds since the epoch)
427 # $step = the interval at which the database steps
428 # used when we create a new rrd
429 # $xmlref = reference to the xml data packet
430 # @data = array containing data items to add
431 # (eg. "packet.cpu.user:user:GAUGE")
432 #
433 sub updaterrd() {
434 my($machine, $type, $date, $step, $xmlref, @data) = @_;
435 # get hold of the xmlhash we have a reference to
436 my(%xmlhash) = %$xmlref;
437 # check if we need to create a new rrd
438 if( ! -f "$rrddir/$machine/$type.rrd") {
439 my(@createdata);
440 # pull the details out of the data we've been given
441 foreach my $dataitem (@data) {
442 if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
443 push @createdata, "$1:$2";
444 }
445 }
446 # call the &makerrd to actually create the rrd
447 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
448 &makerrd($machine, $type, $date, $step, @createdata);
449 }
450 # get the details out of the data we've been given
451 my($updateparams) = "$date";
452 foreach my $dataitem (@data) {
453 if($dataitem =~ /^(\S+):\S+:\S+$/) {
454 # pull the values straight out of the xmlhash
455 my($value) = $xmlhash{$1};
456 # if it's undefined we'll set it to 0
457 # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
458 $value = "0" if not defined $value;
459 $updateparams .= ":$value";
460 }
461 }
462 # perform the update
463 RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
464 &log("updating $rrddir/$machine/$type.rrd\n");
465 my($err) = RRDs::error;
466 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
467 }
468
469 #
470 # sub to create a new rrd file
471 #
472 # $machine = name of the machine
473 # (eg. kernow.ukc.ac.uk)
474 # $type = the type of graph for the machine
475 # (eg. cpu)
476 # $start = the date of the first item we want to add
477 # (in seconds since the epoch)
478 # $step = the interval at which the database steps
479 # @data = the data items we want to put in the rrd
480 # in the form: $dsname:dstype
481 # (eg. "size:GAUGE")
482 #
483 sub makerrd() {
484 my($machine, $type, $start, $step, @data) = @_;
485 # check if directory exists for rrd
486 if(! -d "$rrddir/$machine") {
487 # not sure on this umask, but it seems to work?
488 mkdir "$rrddir/$machine", 0777;
489 &log("created directory $rrddir/$machine\n");
490 }
491 my(@rrdcmd);
492 # we'll want to add our first data item at $start,
493 # so we start our rrd $step before that.
494 $start -= $step;
495 push @rrdcmd, "$rrddir/$machine/$type.rrd";
496 push @rrdcmd, "--start=$start";
497 push @rrdcmd, "--step=$step";
498 foreach my $dataitem (@data) {
499 # dataitem should be: "dsname:dstype"
500 if($dataitem =~ /^(\S+):(\S+)$/) {
501 push @rrdcmd, "DS:$1:$2:600:U:U";
502 }
503 }
504 push @rrdcmd, (
505 # 3h in 15s samples
506 "RRA:AVERAGE:0.5:1:720",
507 "RRA:MAX:0.5:1:720",
508 # 1d in 2m samples
509 "RRA:AVERAGE:0.5:8:720",
510 "RRA:MAX:0.5:8:720",
511 # 1w in 15m samples
512 "RRA:AVERAGE:0.5:60:672",
513 "RRA:MAX:0.5:60:672",
514 # 1m in 1hr samples
515 "RRA:AVERAGE:0.5:240:744",
516 "RRA:MAX:0.5:240:744",
517 # 1y in 12hr samples
518 "RRA:AVERAGE:0.5:2880:730",
519 "RRA:MAX:0.5:2880:730",
520 );
521 RRDs::create (@rrdcmd);
522 my($err) = RRDs::error;
523 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
524 }
525
526 # prints out usage information then exits
527 sub usage() {
528 print "Usage: watch.pl [options] i-scream_client_interface port\n";
529 print "Options\n";
530 print " -c config Specifies the configuration file\n";
531 print " default: rrdgraphing.conf\n";
532 print " -v Be verbose about what's happening\n";
533 print " -q Be quiet, even supress errors\n";
534 print " -V Print version number\n";
535 print " -h Prints this help page\n";
536 exit(1);
537 }
538
539 # prints a log message if verbose is turned on
540 sub log() {
541 my($msg) = @_;
542 print $msg if $verbose;
543 }
544
545 # prints an error message unless quiet is turned on
546 sub error() {
547 my($msg) = @_;
548 print STDERR $msg unless $quiet;
549 }