ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.13
Committed: Thu Nov 23 16:37:44 2006 UTC (17 years, 5 months ago) by tdb
Content type: text/plain
Branch: MAIN
CVS Tags: HEAD
Changes since 1.12: +3 -3 lines
Log Message:
Make graph sizes configurable. Needed for rrdtool 1.2.x when text is
anti-aliased and no longer fits.

File Contents

# Content
1 #!/usr/bin/perl -w
2
3 #
4 # i-scream central monitoring system
5 # http://www.i-scream.org
6 # Copyright (C) 2000-2002 i-scream
7 #
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License
10 # as published by the Free Software Foundation; either version 2
11 # of the License, or (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 #
22
23 # -----------------------------------------------------------
24 # i-scream graph generation scripts
25 # http://www.i-scream.org
26 #
27 # Generates rrd databases for i-scream data by connecting to
28 # the i-scream server and collecting data.
29 #
30 # $Author: tdb $
31 # $Id: watch.pl,v 1.12 2005/06/29 10:26:40 tdb Exp $
32 #------------------------------------------------------------
33
34 my($version) = '$Id: watch.pl,v 1.12 2005/06/29 10:26:40 tdb Exp $';
35
36 $| = 1;
37
38 use strict;
39 use Getopt::Std;
40 use IO::Socket;
41 use iscream::XMLParser;
42 use RRDs;
43
44 # define variables that will be read from the config
45 # nb. keep this insync with the config file!
46 use vars qw{
47 $imgdir $imgwidth $imgheight $rrddir
48 $maxrrdage $maximgage $deleterrds $deleteimgs
49 $hex_slash $hex_underscore $hex_space $hex_colon $hex_bslash $hex_rbracket
50 $hex_lbracket $hex_plus $hex_hash
51 $rrdstep $retry_wait
52 $verbose $quiet
53 };
54
55 # default locate of the config file
56 my($configfile) = "rrdgraphing.conf";
57
58 # check for command line arguments
59 my(%opts);
60 my($ret) = getopts('hvqVc:', \%opts);
61
62 # if invalid argument given, $ret will not be 1
63 &usage() if $ret != 1;
64
65 # first process the arguments which might mean we exit now
66
67 # -h is usage
68 if($opts{h}) {
69 &usage();
70 }
71 # -V is version
72 if($opts{V}) {
73 print "watch.pl version: $version\n";
74 exit(1);
75 }
76
77 # Then try getting the config
78
79 # -c specifies the config file location
80 if($opts{c}) {
81 $configfile = $opts{c};
82 }
83 # suck in the config
84 &log("reading config from $configfile\n");
85 do $configfile;
86
87 # Then any options we might want to override the config with
88
89 # -v is verbose
90 if($opts{v}) {
91 $verbose = $opts{v};
92 }
93 # -q is verbose
94 if($opts{q}) {
95 $quiet = $opts{q};
96 # if we're meant to be quiet, we can hardly be verbose!
97 # $verbose = 0;
98 }
99
100 # Finally check for required arguments
101
102 # check we still have two arguments left
103 if (@ARGV != 2) {
104 &usage();
105 }
106
107 # user supplied client interface server and port
108 my($addr) = $ARGV[0];
109 my($cport) = $ARGV[1];
110
111
112 # Main program loop
113 while(1) {
114
115 &log("Connecting control channel to port $cport on $addr...\n");
116
117 # attempt to connect the control channel
118 my($csock) = new IO::Socket::INET(
119 PeerAddr => $addr,
120 PeerPort => $cport,
121 Proto => 'tcp'
122 );
123
124 # if socket isn't defined connection failed
125 if (!defined $csock) {
126 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
127 &error("Please check that there is an i-scream server at this address.\n");
128 &wait_then_retry();
129 next;
130 }
131
132 my($response);
133
134 # client interface should send it's protocol ID
135 # we know about "PROTOCOL 1.1", and will only accept the same
136 $response = <$csock>;
137 &log("CLI sent: $response");
138 if ($response && $response ne "PROTOCOL 1.1\n") {
139 &error("The i-scream server sent an unexpected protocol ID: $response\n");
140 close($csock);
141 &wait_then_retry();
142 next;
143 }
144
145 # send our identifier to the client interface
146 print $csock "rrdgraphing\n";
147 &log("we sent: rrdgraphing\n");
148 $response = <$csock>;
149 &log("CLI sent: $response");
150 if ($response && $response ne "OK\n") {
151 &error("Received unexpected response: $response\n");
152 close($csock);
153 &wait_then_retry();
154 next;
155 }
156
157 # tell the client interface we'd like to start the data channel
158 print $csock "STARTDATA\n";
159 &log("we sent: STARTDATA\n");
160
161 # the response should be the socket to connect the data channel to
162 $response = <$csock>;
163 &log("CLI sent: $response");
164 chomp $response;
165
166 my($dport) = $response;
167 &log("Connecting data channel to port $dport on $addr...\n");
168
169 # attempt to connect the data channel
170 my($dsock) = new IO::Socket::INET(
171 PeerAddr => $addr,
172 PeerPort => $dport,
173 Proto => 'tcp'
174 ) or die "arse?";
175
176 # if socket isn't defined connection failed
177 if (!defined $dsock) {
178 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
179 &error("Failure in communications.\n");
180 close($csock);
181 &wait_then_retry();
182 next;
183 }
184
185 # the data channel should now be sending us data!
186
187 # call sub to process data being received over the data channel
188 &processdata($dsock);
189
190 # data processing has stopped, close sockets
191 close($csock);
192 close($dsock);
193
194 # wait before retrying
195 &wait_then_retry();
196 }
197
198 # we'll never reach here... unless 1 becomes false for some reason ;)
199 exit 0;
200
201
202 #
203 # wait for a while before retrying
204 #
205 sub wait_then_retry() {
206 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
207 sleep $retry_wait;
208 }
209
210 #
211 # Given the socket of the data channel will process all
212 # the incoming XML data, creating and updating the appropriate
213 # database files.
214 #
215 # $dsock = socket connected to the data channel
216 #
217 sub processdata() {
218 # the socket connected to the data channel
219 my($dsock) = @_;
220 # save us recreating this variable each time we loop
221 my($xml);
222
223 while(1) {
224 # read data
225 $xml = <$dsock>;
226
227 # something odd has happened
228 last if not defined $xml;
229
230 # attempt to parse the data
231 my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
232 if($err) {
233 &error("Skipped, XML did not parse: $xml");
234 next;
235 }
236
237 # standard data packet
238 if($xmlhash{"packet.attributes.type"} eq "data") {
239 my($machine) = $xmlhash{"packet.attributes.machine_name"};
240 my($date) = $xmlhash{"packet.attributes.date"};
241
242 # make directory for machine
243 if(! -d "$rrddir/$machine") {
244 # not sure on this umask, but it seems to work?
245 mkdir "$rrddir/$machine", 0777;
246 &log("created directory $rrddir/$machine\n");
247 }
248
249 my(@data);
250
251 # cpu
252 @data = ( "packet.cpu.idle:idle:GAUGE",
253 "packet.cpu.user:user:GAUGE",
254 "packet.cpu.kernel:kernel:GAUGE",
255 "packet.cpu.swap:swap:GAUGE",
256 "packet.cpu.iowait:iowait:GAUGE",
257 );
258 &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
259
260 # uptime
261 @data = ( "packet.os.uptime:uptime:GAUGE" );
262 &updaterrd($machine, "uptime", $date, $rrdstep, \%xmlhash, @data);
263
264 # mem
265 @data = ( "packet.memory.free:free:GAUGE",
266 "packet.memory.total:total:GAUGE",
267 "packet.memory.cache:cache:GAUGE",
268 );
269 &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
270
271 # load
272 @data = ( "packet.load.load1:load1:GAUGE",
273 "packet.load.load5:load5:GAUGE",
274 "packet.load.load15:load15:GAUGE",
275 );
276 &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
277
278 # processes
279 @data = ( "packet.processes.cpu:cpu:GAUGE",
280 "packet.processes.sleeping:sleeping:GAUGE",
281 "packet.processes.stopped:stopped:GAUGE",
282 "packet.processes.total:total:GAUGE",
283 "packet.processes.zombie:zombie:GAUGE",
284 );
285 &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
286
287 # swap
288 @data = ( "packet.swap.free:free:GAUGE",
289 "packet.swap.total:total:GAUGE",
290 );
291 &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
292
293 # users
294 @data = ( "packet.users.count:count:GAUGE",
295 );
296 &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
297
298 # paging
299 @data = ( "packet.pages.pageins:pageins:GAUGE",
300 "packet.pages.pageouts:pageouts:GAUGE",
301 );
302 &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data);
303
304 # disk
305 my($i) = 0;
306 while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
307 my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
308 $mount =~ s/_/$hex_underscore/g;
309 $mount =~ s/\//$hex_slash/g;
310 $mount =~ s/\\/$hex_bslash/g;
311 $mount =~ s/ /$hex_space/g;
312 $mount =~ s/:/$hex_colon/g;
313 @data = ( "packet.disk.p$i.attributes.total:total:GAUGE",
314 "packet.disk.p$i.attributes.used:used:GAUGE",
315 "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE",
316 "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE",
317 );
318 &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
319 ++$i;
320 }
321
322 # diskio
323 $i = 0;
324 while(defined $xmlhash{"packet.diskio.p$i.attributes.name"}) {
325 my($name) = $xmlhash{"packet.diskio.p$i.attributes.name"};
326 $name =~ s/_/$hex_underscore/g;
327 $name =~ s/\//$hex_slash/g;
328 $name =~ s/\\/$hex_bslash/g;
329 $name =~ s/ /$hex_space/g;
330 $name =~ s/:/$hex_colon/g;
331 @data = ( "packet.diskio.p$i.attributes.rbytes:rbytes:GAUGE",
332 "packet.diskio.p$i.attributes.wbytes:wbytes:GAUGE",
333 );
334 &updaterrd($machine, "diskio-$name", $date, $rrdstep, \%xmlhash, @data);
335 ++$i
336 }
337
338 # net
339 $i = 0;
340 while(defined $xmlhash{"packet.net.p$i.attributes.name"}) {
341 my($name) = $xmlhash{"packet.net.p$i.attributes.name"};
342 $name =~ s/_/$hex_underscore/g;
343 $name =~ s/\//$hex_slash/g;
344 $name =~ s/\\/$hex_bslash/g;
345 $name =~ s/ /$hex_space/g;
346 $name =~ s/:/$hex_colon/g;
347 $name =~ s/\(/$hex_lbracket/g;
348 $name =~ s/\)/$hex_rbracket/g;
349 $name =~ s/\+/$hex_plus/g;
350 $name =~ s/#/$hex_hash/g;
351 @data = ( "packet.net.p$i.attributes.rx:rx:GAUGE",
352 "packet.net.p$i.attributes.tx:tx:GAUGE",
353 );
354 &updaterrd($machine, "net-$name", $date, $rrdstep, \%xmlhash, @data);
355 ++$i
356 }
357
358 # mailq
359 $i = 0;
360 while(defined $xmlhash{"packet.mailq.p$i.attributes.name"}) {
361 my($name) = $xmlhash{"packet.mailq.p$i.attributes.name"};
362 $name =~ s/\s+//g;
363 @data = ( "packet.mailq.p$i.attributes.size:size:GAUGE",
364 );
365 &updaterrd($machine, "mailq-$name", $date, $rrdstep, \%xmlhash, @data);
366 ++$i
367 }
368 }
369
370 # queue statistics packet
371 elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
372 # psuedo machine for internal server stuff
373 my($machine) = "i-scream-server";
374 # make directory
375 if(! -d "$rrddir/$machine") {
376 # not sure on this umask, but it seems to work?
377 mkdir "$rrddir/$machine", 0777;
378 &log("created directory $rrddir/$machine\n");
379 }
380 my($hash) = $xmlhash{"packet.attributes.hashCode"};
381 my($date) = $xmlhash{"packet.attributes.date"};
382 my($name) = $xmlhash{"packet.attributes.name"};
383 # take a look to see if we have a shutdown packet...
384 if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
385 unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
386 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
387 next;
388 }
389 # look through to see how many internal queues we have
390 my($i) = 0;
391 while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
392 # see if the queue has been removed
393 if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
394 # delete the queues rrd
395 unlink "$rrddir/$machine/$hash\_$i.rrd";
396 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
397 # are there any other rrd's left on this queue? if not, cleanup.
398 # get a list of any that may be still there..
399 opendir(DIR, "$rrddir/$machine");
400 my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
401 closedir DIR;
402 # count them (+1 because an empty array is size -1)
403 my($rrdcount) = $#rrdcountfiles + 1;
404 if($rrdcount == 0) {
405 # clean up the def file and any images
406 unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
407 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
408 }
409 ++$i;
410 next;
411 }
412 # the &updaterrd will also do this check, but we want
413 # to write our def file out first
414 if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
415 open(DEF, ">$rrddir/$machine/$hash.def");
416 print DEF $name;
417 close DEF;
418 &log("created $rrddir/$machine/$hash.def\n");
419 }
420 my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
421 "packet.queue.attributes.total:total:COUNTER",
422 );
423 &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
424 ++$i;
425 }
426 }
427 else {
428 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
429 }
430 }
431
432 # we'll now return from this sub and reconnect
433 &error("Data channel socket gave no data, bailing out...\n");
434 }
435
436 #
437 # sub to update an rrd file
438 #
439 # $machine = name of the machine
440 # (eg. kernow.ukc.ac.uk)
441 # $type = the type of graph for the machine
442 # (eg. cpu)
443 # $date = the date of the item we want to add
444 # (in seconds since the epoch)
445 # $step = the interval at which the database steps
446 # used when we create a new rrd
447 # $xmlref = reference to the xml data packet
448 # @data = array containing data items to add
449 # (eg. "packet.cpu.user:user:GAUGE")
450 #
451 sub updaterrd() {
452 my($machine, $type, $date, $step, $xmlref, @data) = @_;
453 # get hold of the xmlhash we have a reference to
454 my(%xmlhash) = %$xmlref;
455 # check if we need to create a new rrd
456 if( ! -f "$rrddir/$machine/$type.rrd") {
457 my(@createdata);
458 # pull the details out of the data we've been given
459 foreach my $dataitem (@data) {
460 if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
461 push @createdata, "$1:$2";
462 }
463 }
464 # call the &makerrd to actually create the rrd
465 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
466 &makerrd($machine, $type, $date, $step, @createdata);
467 }
468 # get the details out of the data we've been given
469 my($updateparams) = "$date";
470 foreach my $dataitem (@data) {
471 if($dataitem =~ /^(\S+):\S+:\S+$/) {
472 # pull the values straight out of the xmlhash
473 my($value) = $xmlhash{$1};
474 # if it's undefined we'll set it to 0
475 # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
476 $value = "0" if not defined $value;
477 $updateparams .= ":$value";
478 }
479 }
480 # perform the update
481 RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
482 &log("updating $rrddir/$machine/$type.rrd\n");
483 my($err) = RRDs::error;
484 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
485 }
486
487 #
488 # sub to create a new rrd file
489 #
490 # $machine = name of the machine
491 # (eg. kernow.ukc.ac.uk)
492 # $type = the type of graph for the machine
493 # (eg. cpu)
494 # $start = the date of the first item we want to add
495 # (in seconds since the epoch)
496 # $step = the interval at which the database steps
497 # @data = the data items we want to put in the rrd
498 # in the form: $dsname:dstype
499 # (eg. "size:GAUGE")
500 #
501 sub makerrd() {
502 my($machine, $type, $start, $step, @data) = @_;
503 # check if directory exists for rrd
504 if(! -d "$rrddir/$machine") {
505 # not sure on this umask, but it seems to work?
506 mkdir "$rrddir/$machine", 0777;
507 &log("created directory $rrddir/$machine\n");
508 }
509 my(@rrdcmd);
510 # we'll want to add our first data item at $start,
511 # so we start our rrd $step before that.
512 $start -= $step;
513 push @rrdcmd, "$rrddir/$machine/$type.rrd";
514 push @rrdcmd, "--start=$start";
515 push @rrdcmd, "--step=$step";
516 foreach my $dataitem (@data) {
517 # dataitem should be: "dsname:dstype"
518 if($dataitem =~ /^(\S+):(\S+)$/) {
519 push @rrdcmd, "DS:$1:$2:600:U:U";
520 }
521 }
522 push @rrdcmd, (
523 # 3h in 15s samples
524 "RRA:AVERAGE:0.5:1:720",
525 "RRA:MAX:0.5:1:720",
526 # 1d in 2m samples
527 "RRA:AVERAGE:0.5:8:720",
528 "RRA:MAX:0.5:8:720",
529 # 1w in 15m samples
530 "RRA:AVERAGE:0.5:60:672",
531 "RRA:MAX:0.5:60:672",
532 # 1m in 1hr samples
533 "RRA:AVERAGE:0.5:240:744",
534 "RRA:MAX:0.5:240:744",
535 # 1y in 12hr samples
536 "RRA:AVERAGE:0.5:2880:730",
537 "RRA:MAX:0.5:2880:730",
538 );
539 RRDs::create (@rrdcmd);
540 my($err) = RRDs::error;
541 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
542 }
543
544 # prints out usage information then exits
545 sub usage() {
546 print "Usage: watch.pl [options] i-scream_client_interface port\n";
547 print "Options\n";
548 print " -c config Specifies the configuration file\n";
549 print " default: rrdgraphing.conf\n";
550 print " -v Be verbose about what's happening\n";
551 print " -q Be quiet, even supress errors\n";
552 print " -V Print version number\n";
553 print " -h Prints this help page\n";
554 exit(1);
555 }
556
557 # prints a log message if verbose is turned on
558 sub log() {
559 my($msg) = @_;
560 print $msg if $verbose;
561 }
562
563 # prints an error message unless quiet is turned on
564 sub error() {
565 my($msg) = @_;
566 print STDERR $msg unless $quiet;
567 }