ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.5
Committed: Mon Oct 21 13:02:58 2002 UTC (22 years ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.4: +11 -3 lines
Log Message:
Add support for disk inode usage, and paging activity. Have added to both
the latest data page, and to the graphs sections. Also reworked the memory,
swap, and disk graphing to be percentage based.

File Contents

# Content
1 #!/usr/bin/perl -w
2
3 #
4 # i-scream central monitoring system
5 # http://www.i-scream.org.uk
6 # Copyright (C) 2000-2002 i-scream
7 #
8 # This program is free software; you can redistribute it and/or
9 # modify it under the terms of the GNU General Public License
10 # as published by the Free Software Foundation; either version 2
11 # of the License, or (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21 #
22
23 # -----------------------------------------------------------
24 # i-scream graph generation scripts
25 # http://www.i-scream.org.uk
26 #
27 # Generates rrd databases for i-scream data by connecting to
28 # the i-scream server and collecting data.
29 #
30 # $Author: tdb $
31 # $Id: watch.pl,v 1.4 2002/05/21 16:47:16 tdb Exp $
32 #------------------------------------------------------------
33
34 my($version) = '$Id: watch.pl,v 1.4 2002/05/21 16:47:16 tdb Exp $';
35
36 $| = 1;
37
38 use strict;
39 use Getopt::Std;
40 use IO::Socket;
41 use iscream::XMLParser;
42 use RRDs;
43
44 # define variables that will be read from the config
45 # nb. keep this insync with the config file!
46 use vars qw{
47 $imgdir $rrddir
48 $maxrrdage $maximgage $deleterrds $deleteimgs
49 $hex_slash $hex_underscore
50 $rrdstep $retry_wait
51 $verbose $quiet
52 };
53
54 # default locate of the config file
55 my($configfile) = "rrdgraphing.conf";
56
57 # check for command line arguments
58 my(%opts);
59 my($ret) = getopts('hvqVc:', \%opts);
60
61 # if invalid argument given, $ret will not be 1
62 &usage() if $ret != 1;
63
64 # first process the arguments which might mean we exit now
65
66 # -h is usage
67 if($opts{h}) {
68 &usage();
69 }
70 # -V is version
71 if($opts{V}) {
72 print "watch.pl version: $version\n";
73 exit(1);
74 }
75
76 # Then try getting the config
77
78 # -c specifies the config file location
79 if($opts{c}) {
80 $configfile = $opts{c};
81 }
82 # suck in the config
83 &log("reading config from $configfile\n");
84 do $configfile;
85
86 # Then any options we might want to override the config with
87
88 # -v is verbose
89 if($opts{v}) {
90 $verbose = $opts{v};
91 }
92 # -q is verbose
93 if($opts{q}) {
94 $quiet = $opts{q};
95 # if we're meant to be quiet, we can hardly be verbose!
96 # $verbose = 0;
97 }
98
99 # Finally check for required arguments
100
101 # check we still have two arguments left
102 if (@ARGV != 2) {
103 &usage();
104 }
105
106 # user supplied client interface server and port
107 my($addr) = $ARGV[0];
108 my($cport) = $ARGV[1];
109
110
111 # Main program loop
112 while(1) {
113
114 &log("Connecting control channel to port $cport on $addr...\n");
115
116 # attempt to connect the control channel
117 my($csock) = new IO::Socket::INET(
118 PeerAddr => $addr,
119 PeerPort => $cport,
120 Proto => 'tcp'
121 );
122
123 # if socket isn't defined connection failed
124 if (!defined $csock) {
125 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
126 &error("Please check that there is an i-scream server at this address.\n");
127 &wait_then_retry();
128 next;
129 }
130
131 my($response);
132
133 # client interface should send it's protocol ID
134 # we know about "PROTOCOL 1.1", and will only accept the same
135 $response = <$csock>;
136 &log("CLI sent: $response");
137 if ($response && $response ne "PROTOCOL 1.1\n") {
138 &error("The i-scream server sent an unexpected protocol ID: $response\n");
139 close($csock);
140 &wait_then_retry();
141 next;
142 }
143
144 # send our identifier to the client interface
145 print $csock "rrdgraphing\n";
146 &log("we sent: rrdgraphing\n");
147 $response = <$csock>;
148 &log("CLI sent: $response");
149 if ($response && $response ne "OK\n") {
150 &error("Received unexpected response: $response\n");
151 close($csock);
152 &wait_then_retry();
153 next;
154 }
155
156 # tell the client interface we'd like to start the data channel
157 print $csock "STARTDATA\n";
158 &log("we sent: STARTDATA\n");
159
160 # the response should be the socket to connect the data channel to
161 $response = <$csock>;
162 &log("CLI sent: $response");
163 chomp $response;
164
165 my($dport) = $response;
166 &log("Connecting data channel to port $dport on $addr...\n");
167
168 # attempt to connect the data channel
169 my($dsock) = new IO::Socket::INET(
170 PeerAddr => $addr,
171 PeerPort => $dport,
172 Proto => 'tcp'
173 ) or die "arse?";
174
175 # if socket isn't defined connection failed
176 if (!defined $dsock) {
177 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
178 &error("Failure in communications.\n");
179 close($csock);
180 &wait_then_retry();
181 next;
182 }
183
184 # the data channel should now be sending us data!
185
186 # call sub to process data being received over the data channel
187 &processdata($dsock);
188
189 # data processing has stopped, close sockets
190 close($csock);
191 close($dsock);
192
193 # wait before retrying
194 &wait_then_retry();
195 }
196
197 # we'll never reach here... unless 1 becomes false for some reason ;)
198 exit 0;
199
200
201 #
202 # wait for a while before retrying
203 #
204 sub wait_then_retry() {
205 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
206 sleep $retry_wait;
207 }
208
209 #
210 # Given the socket of the data channel will process all
211 # the incoming XML data, creating and updating the appropriate
212 # database files.
213 #
214 # $dsock = socket connected to the data channel
215 #
216 sub processdata() {
217 # the socket connected to the data channel
218 my($dsock) = @_;
219 # save us recreating this variable each time we loop
220 my($xml);
221
222 while(1) {
223 # read data
224 $xml = <$dsock>;
225
226 # something odd has happened
227 last if not defined $xml;
228
229 # attempt to parse the data
230 my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
231 if($err) {
232 &error("Skipped, XML did not parse: $xml");
233 next;
234 }
235
236 # standard data packet
237 if($xmlhash{"packet.attributes.type"} eq "data") {
238 my($machine) = $xmlhash{"packet.attributes.machine_name"};
239 my($date) = $xmlhash{"packet.attributes.date"};
240
241 # make directory for machine
242 if(! -d "$rrddir/$machine") {
243 # not sure on this umask, but it seems to work?
244 mkdir "$rrddir/$machine", 0777;
245 &log("created directory $rrddir/$machine\n");
246 }
247
248 my(@data);
249
250 # cpu
251 @data = ( "packet.cpu.idle:idle:GAUGE",
252 "packet.cpu.user:user:GAUGE",
253 "packet.cpu.kernel:kernel:GAUGE",
254 "packet.cpu.swap:swap:GAUGE",
255 "packet.cpu.iowait:iowait:GAUGE",
256 );
257 &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
258
259 # mem
260 @data = ( "packet.memory.free:free:GAUGE",
261 "packet.memory.total:total:GAUGE",
262 );
263 &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
264
265 # load
266 @data = ( "packet.load.load1:load1:GAUGE",
267 "packet.load.load5:load5:GAUGE",
268 "packet.load.load15:load15:GAUGE",
269 );
270 &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
271
272 # processes
273 @data = ( "packet.processes.cpu:cpu:GAUGE",
274 "packet.processes.sleeping:sleeping:GAUGE",
275 "packet.processes.stopped:stopped:GAUGE",
276 "packet.processes.total:total:GAUGE",
277 "packet.processes.zombie:zombie:GAUGE",
278 );
279 &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
280
281 # swap
282 @data = ( "packet.swap.free:free:GAUGE",
283 "packet.swap.total:total:GAUGE",
284 );
285 &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
286
287 # users
288 @data = ( "packet.users.count:count:GAUGE",
289 );
290 &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
291
292 # paging
293 @data = ( "packet.pages.swapins:swapins:GAUGE",
294 "packet.pages.swapouts:swapouts:GAUGE",
295 );
296 &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data);
297
298 # disk
299 my($i) = 0;
300 while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
301 my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
302 $mount =~ s/_/$hex_underscore/g;
303 $mount =~ s/\//$hex_slash/g;
304 @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE",
305 "packet.disk.p$i.attributes.used:used:GAUGE",
306 "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE",
307 "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE",
308 );
309 &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
310 ++$i;
311 }
312 }
313
314 # queue statistics packet
315 elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
316 # psuedo machine for internal server stuff
317 my($machine) = "i-scream-server";
318 # make directory
319 if(! -d "$rrddir/$machine") {
320 # not sure on this umask, but it seems to work?
321 mkdir "$rrddir/$machine", 0777;
322 &log("created directory $rrddir/$machine\n");
323 }
324 my($hash) = $xmlhash{"packet.attributes.hashCode"};
325 my($date) = $xmlhash{"packet.attributes.date"};
326 my($name) = $xmlhash{"packet.attributes.name"};
327 # take a look to see if we have a shutdown packet...
328 if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
329 unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
330 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
331 next;
332 }
333 # look through to see how many internal queues we have
334 my($i) = 0;
335 while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
336 # see if the queue has been removed
337 if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
338 # delete the queues rrd
339 unlink "$rrddir/$machine/$hash\_$i.rrd";
340 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
341 # are there any other rrd's left on this queue? if not, cleanup.
342 # get a list of any that may be still there..
343 opendir(DIR, "$rrddir/$machine");
344 my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
345 closedir DIR;
346 # count them (+1 because an empty array is size -1)
347 my($rrdcount) = $#rrdcountfiles + 1;
348 if($rrdcount == 0) {
349 # clean up the def file and any images
350 unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
351 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
352 }
353 ++$i;
354 next;
355 }
356 # the &updaterrd will also do this check, but we want
357 # to write our def file out first
358 if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
359 open(DEF, ">$rrddir/$machine/$hash.def");
360 print DEF $name;
361 close DEF;
362 &log("created $rrddir/$machine/$hash.def\n");
363 }
364 my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
365 "packet.queue.attributes.total:total:COUNTER",
366 );
367 &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
368 ++$i;
369 }
370 }
371 else {
372 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
373 }
374 }
375
376 # we'll now return from this sub and reconnect
377 &error("Data channel socket gave no data, bailing out...\n");
378 }
379
380 #
381 # sub to update an rrd file
382 #
383 # $machine = name of the machine
384 # (eg. kernow.ukc.ac.uk)
385 # $type = the type of graph for the machine
386 # (eg. cpu)
387 # $date = the date of the item we want to add
388 # (in seconds since the epoch)
389 # $step = the interval at which the database steps
390 # used when we create a new rrd
391 # $xmlref = reference to the xml data packet
392 # @data = array containing data items to add
393 # (eg. "packet.cpu.user:user:GAUGE")
394 #
395 sub updaterrd() {
396 my($machine, $type, $date, $step, $xmlref, @data) = @_;
397 # get hold of the xmlhash we have a reference to
398 my(%xmlhash) = %$xmlref;
399 # check if we need to create a new rrd
400 if( ! -f "$rrddir/$machine/$type.rrd") {
401 my(@createdata);
402 # pull the details out of the data we've been given
403 foreach my $dataitem (@data) {
404 if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
405 push @createdata, "$1:$2";
406 }
407 }
408 # call the &makerrd to actually create the rrd
409 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
410 &makerrd($machine, $type, $date, $step, @createdata);
411 }
412 # get the details out of the data we've been given
413 my($updateparams) = "$date";
414 foreach my $dataitem (@data) {
415 if($dataitem =~ /^(\S+):\S+:\S+$/) {
416 # pull the values straight out of the xmlhash
417 my($value) = $xmlhash{$1};
418 # if it's undefined we'll set it to 0
419 # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
420 $value = "0" if not defined $value;
421 $updateparams .= ":$value";
422 }
423 }
424 # perform the update
425 RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
426 &log("updating $rrddir/$machine/$type.rrd\n");
427 my($err) = RRDs::error;
428 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
429 }
430
431 #
432 # sub to create a new rrd file
433 #
434 # $machine = name of the machine
435 # (eg. kernow.ukc.ac.uk)
436 # $type = the type of graph for the machine
437 # (eg. cpu)
438 # $start = the date of the first item we want to add
439 # (in seconds since the epoch)
440 # $step = the interval at which the database steps
441 # @data = the data items we want to put in the rrd
442 # in the form: $dsname:dstype
443 # (eg. "size:GAUGE")
444 #
445 sub makerrd() {
446 my($machine, $type, $start, $step, @data) = @_;
447 # check if directory exists for rrd
448 if(! -d "$rrddir/$machine") {
449 # not sure on this umask, but it seems to work?
450 mkdir "$rrddir/$machine", 0777;
451 &log("created directory $rrddir/$machine\n");
452 }
453 my(@rrdcmd);
454 # we'll want to add our first data item at $start,
455 # so we start our rrd $step before that.
456 $start -= $step;
457 push @rrdcmd, "$rrddir/$machine/$type.rrd";
458 push @rrdcmd, "--start=$start";
459 push @rrdcmd, "--step=$step";
460 foreach my $dataitem (@data) {
461 # dataitem should be: "dsname:dstype"
462 if($dataitem =~ /^(\S+):(\S+)$/) {
463 push @rrdcmd, "DS:$1:$2:600:U:U";
464 }
465 }
466 push @rrdcmd, (
467 # 3h in 15s samples
468 "RRA:AVERAGE:0.5:1:720",
469 "RRA:MAX:0.5:1:720",
470 # 1d in 2m samples
471 "RRA:AVERAGE:0.5:8:720",
472 "RRA:MAX:0.5:8:720",
473 # 1w in 15m samples
474 "RRA:AVERAGE:0.5:60:672",
475 "RRA:MAX:0.5:60:672",
476 # 1m in 1hr samples
477 "RRA:AVERAGE:0.5:240:744",
478 "RRA:MAX:0.5:240:744",
479 # 1y in 12hr samples
480 "RRA:AVERAGE:0.5:2880:730",
481 "RRA:MAX:0.5:2880:730",
482 );
483 RRDs::create (@rrdcmd);
484 my($err) = RRDs::error;
485 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
486 }
487
488 # prints out usage information then exits
489 sub usage() {
490 print "Usage: watch.pl [options] i-scream_client_interface port\n";
491 print "Options\n";
492 print " -c config Specifies the configuration file\n";
493 print " default: rrdgraphing.conf\n";
494 print " -v Be verbose about what's happening\n";
495 print " -q Be quiet, even supress errors\n";
496 print " -V Print version number\n";
497 print " -h Prints this help page\n";
498 exit(1);
499 }
500
501 # prints a log message if verbose is turned on
502 sub log() {
503 my($msg) = @_;
504 print $msg if $verbose;
505 }
506
507 # prints an error message unless quiet is turned on
508 sub error() {
509 my($msg) = @_;
510 print STDERR $msg unless $quiet;
511 }