ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.3
Committed: Tue May 21 15:01:43 2002 UTC (22 years, 5 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.2: +116 -41 lines
Log Message:
Changed to have a shared config file. Also parses command line options to
make it more verbose or quiet.

File Contents

# Content
1 #!/usr/bin/perl -w
2
3 #
4 # i-scream central monitoring system
5 # Copyright (C) 2000-2002 i-scream
6 #
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 #
21
22 # -----------------------------------------------------------
23 # i-scream graph generation scripts
24 # http://www.i-scream.org.uk
25 #
26 # Generates rrd databases for i-scream data by connecting to
27 # the i-scream server and collecting data.
28 #
29 # $Author: tdb $
30 # $Id: watch.pl,v 1.2 2002/05/18 18:15:59 tdb Exp $
31 #------------------------------------------------------------
32
33 my($version) = '$Id$';
34
35 $| = 1;
36
37 use strict;
38 use Getopt::Std;
39 use IO::Socket;
40 use iscream::XMLParser;
41 use RRDs;
42
43 # define variables that will be read from the config
44 # nb. keep this insync with the config file!
45 use vars qw{
46 $imgdir $rrddir
47 $maxrrdage $maximgage $deleterrds $deleteimgs
48 $hex_slash $hex_underscore
49 $rrdstep $retry_wait
50 $verbose $quiet
51 };
52
53 # default locate of the config file
54 my($configfile) = "rrdgraphing.conf";
55
56 # check for command line arguments
57 my(%opts);
58 my($ret) = getopts('hvqVc:', \%opts);
59
60 # if invalid argument given, $ret will not be 1
61 &usage() if $ret != 1;
62
63 # first process the arguments which might mean we exit now
64
65 # -h is usage
66 if($opts{h}) {
67 &usage();
68 }
69 # -V is version
70 if($opts{V}) {
71 print "watch.pl version: $version\n";
72 exit(1);
73 }
74
75 # Then try getting the config
76
77 # -c specifies the config file location
78 if($opts{c}) {
79 $configfile = $opts{c};
80 }
81 # suck in the config
82 &log("reading config from $configfile\n");
83 do $configfile;
84
85 # Then any options we might want to override the config with
86
87 # -v is verbose
88 if($opts{v}) {
89 $verbose = $opts{v};
90 }
91 # -q is verbose
92 if($opts{q}) {
93 $quiet = $opts{q};
94 # if we're meant to be quiet, we can hardly be verbose!
95 # $verbose = 0;
96 }
97
98 # Finally check for required arguments
99
100 # check we still have two arguments left
101 if (@ARGV != 2) {
102 &usage();
103 }
104
105 # user supplied client interface server and port
106 my($addr) = $ARGV[0];
107 my($cport) = $ARGV[1];
108
109
110 # Main program loop
111 while(1) {
112
113 &log("Connecting control channel to port $cport on $addr...\n");
114
115 # attempt to connect the control channel
116 my($csock) = new IO::Socket::INET(
117 PeerAddr => $addr,
118 PeerPort => $cport,
119 Proto => 'tcp'
120 );
121
122 # if socket isn't defined connection failed
123 if (!defined $csock) {
124 &error("ERROR: Could not connect control channel to $addr:$cport.\n");
125 &error("Please check that there is an i-scream server at this address.\n");
126 &wait_then_retry();
127 next;
128 }
129
130 my($response);
131
132 # client interface should send it's protocol ID
133 # we know about "PROTOCOL 1.1", and will only accept the same
134 $response = <$csock>;
135 &log("CLI sent: $response");
136 if ($response && $response ne "PROTOCOL 1.1\n") {
137 &error("The i-scream server sent an unexpected protocol ID: $response\n");
138 close($csock);
139 &wait_then_retry();
140 next;
141 }
142
143 # send our identifier to the client interface
144 print $csock "rrdgraphing\n";
145 &log("we sent: rrdgraphing\n");
146 $response = <$csock>;
147 &log("CLI sent: $response");
148 if ($response && $response ne "OK\n") {
149 &error("Received unexpected response: $response\n");
150 close($csock);
151 &wait_then_retry();
152 next;
153 }
154
155 # tell the client interface we'd like to start the data channel
156 print $csock "STARTDATA\n";
157 &log("we sent: STARTDATA\n");
158
159 # the response should be the socket to connect the data channel to
160 $response = <$csock>;
161 &log("CLI sent: $response");
162 chomp $response;
163
164 my($dport) = $response;
165 &log("Connecting data channel to port $dport on $addr...\n");
166
167 # attempt to connect the data channel
168 my($dsock) = new IO::Socket::INET(
169 PeerAddr => $addr,
170 PeerPort => $dport,
171 Proto => 'tcp'
172 ) or die "arse?";
173
174 # if socket isn't defined connection failed
175 if (!defined $dsock) {
176 &error("ERROR: Could not connect data channel to $addr:$dport.\n");
177 &error("Failure in communications.\n");
178 close($csock);
179 &wait_then_retry();
180 next;
181 }
182
183 # the data channel should now be sending us data!
184
185 # call sub to process data being received over the data channel
186 &processdata($dsock);
187
188 # data processing has stopped, close sockets
189 close($csock);
190 close($dsock);
191
192 # wait before retrying
193 &wait_then_retry();
194 }
195
196 # we'll never reach here... unless 1 becomes false for some reason ;)
197 exit 0;
198
199
200 #
201 # wait for a while before retrying
202 #
203 sub wait_then_retry() {
204 &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n");
205 sleep $retry_wait;
206 }
207
208 #
209 # Given the socket of the data channel will process all
210 # the incoming XML data, creating and updating the appropriate
211 # database files.
212 #
213 # $dsock = socket connected to the data channel
214 #
215 sub processdata() {
216 # the socket connected to the data channel
217 my($dsock) = @_;
218 # save us recreating this variable each time we loop
219 my($xml);
220
221 while(1) {
222 # read data
223 $xml = <$dsock>;
224
225 # something odd has happened
226 last if not defined $xml;
227
228 # attempt to parse the data
229 my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
230 if($err) {
231 &error("Skipped, XML did not parse: $xml");
232 next;
233 }
234
235 # standard data packet
236 if($xmlhash{"packet.attributes.type"} eq "data") {
237 my($machine) = $xmlhash{"packet.attributes.machine_name"};
238 my($date) = $xmlhash{"packet.attributes.date"};
239
240 # make directory for machine
241 if(! -d "$rrddir/$machine") {
242 # not sure on this umask, but it seems to work?
243 mkdir "$rrddir/$machine", 0777;
244 &log("created directory $rrddir/$machine\n");
245 }
246
247 my(@data);
248
249 # cpu
250 @data = ( "packet.cpu.idle:idle:GAUGE",
251 "packet.cpu.user:user:GAUGE",
252 "packet.cpu.kernel:kernel:GAUGE",
253 "packet.cpu.swap:swap:GAUGE",
254 "packet.cpu.iowait:iowait:GAUGE",
255 );
256 &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
257
258 # mem
259 @data = ( "packet.memory.free:free:GAUGE",
260 "packet.memory.total:total:GAUGE",
261 );
262 &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
263
264 # load
265 @data = ( "packet.load.load1:load1:GAUGE",
266 "packet.load.load5:load5:GAUGE",
267 "packet.load.load15:load15:GAUGE",
268 );
269 &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
270
271 # processes
272 @data = ( "packet.processes.cpu:cpu:GAUGE",
273 "packet.processes.sleeping:sleeping:GAUGE",
274 "packet.processes.stopped:stopped:GAUGE",
275 "packet.processes.total:total:GAUGE",
276 "packet.processes.zombie:zombie:GAUGE",
277 );
278 &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
279
280 # swap
281 @data = ( "packet.swap.free:free:GAUGE",
282 "packet.swap.total:total:GAUGE",
283 );
284 &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
285
286 # users
287 @data = ( "packet.users.count:count:GAUGE",
288 );
289 &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
290
291 # disk
292 my($i) = 0;
293 while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
294 my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
295 $mount =~ s/_/$hex_underscore/g;
296 $mount =~ s/\//$hex_slash/g;
297 @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE",
298 "packet.disk.p$i.attributes.used:used:GAUGE",
299 );
300 &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
301 ++$i;
302 }
303 }
304
305 # queue statistics packet
306 elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
307 # psuedo machine for internal server stuff
308 my($machine) = "i-scream-server";
309 # make directory
310 if(! -d "$rrddir/$machine") {
311 # not sure on this umask, but it seems to work?
312 mkdir "$rrddir/$machine", 0777;
313 &log("created directory $rrddir/$machine\n");
314 }
315 my($hash) = $xmlhash{"packet.attributes.hashCode"};
316 my($date) = $xmlhash{"packet.attributes.date"};
317 my($name) = $xmlhash{"packet.attributes.name"};
318 # take a look to see if we have a shutdown packet...
319 if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
320 unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
321 &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
322 next;
323 }
324 # look through to see how many internal queues we have
325 my($i) = 0;
326 while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
327 # see if the queue has been removed
328 if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
329 # delete the queues rrd
330 unlink "$rrddir/$machine/$hash\_$i.rrd";
331 &log("deleted $rrddir/$machine/$hash\_$i.rrd\n");
332 # are there any other rrd's left on this queue? if not, cleanup.
333 # get a list of any that may be still there..
334 opendir(DIR, "$rrddir/$machine");
335 my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
336 closedir DIR;
337 # count them (+1 because an empty array is size -1)
338 my($rrdcount) = $#rrdcountfiles + 1;
339 if($rrdcount == 0) {
340 # clean up the def file and any images
341 unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
342 &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n");
343 }
344 ++$i;
345 next;
346 }
347 # the &updaterrd will also do this check, but we want
348 # to write our def file out first
349 if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
350 open(DEF, ">$rrddir/$machine/$hash.def");
351 print DEF $name;
352 close DEF;
353 &log("created $rrddir/$machine/$hash.def\n");
354 }
355 my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
356 "packet.queue.attributes.total:total:COUNTER",
357 );
358 &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
359 ++$i;
360 }
361 }
362 else {
363 #&error("SKIPPED: valid xml, but not a data or statistics packet\n");
364 }
365 }
366
367 # we'll now return from this sub and reconnect
368 &error("Data channel socket gave no data, bailing out...\n");
369 }
370
371 #
372 # sub to update an rrd file
373 #
374 # $machine = name of the machine
375 # (eg. kernow.ukc.ac.uk)
376 # $type = the type of graph for the machine
377 # (eg. cpu)
378 # $date = the date of the item we want to add
379 # (in seconds since the epoch)
380 # $step = the interval at which the database steps
381 # used when we create a new rrd
382 # $xmlref = reference to the xml data packet
383 # @data = array containing data items to add
384 # (eg. "packet.cpu.user:user:GAUGE")
385 #
386 sub updaterrd() {
387 my($machine, $type, $date, $step, $xmlref, @data) = @_;
388 # get hold of the xmlhash we have a reference to
389 my(%xmlhash) = %$xmlref;
390 # check if we need to create a new rrd
391 if( ! -f "$rrddir/$machine/$type.rrd") {
392 my(@createdata);
393 # pull the details out of the data we've been given
394 foreach my $dataitem (@data) {
395 if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
396 push @createdata, "$1:$2";
397 }
398 }
399 # call the &makerrd to actually create the rrd
400 &log("making new rrd for $rrddir/$machine/$type.rrd\n");
401 &makerrd($machine, $type, $date, $step, @createdata);
402 }
403 # get the details out of the data we've been given
404 my($updateparams) = "$date";
405 foreach my $dataitem (@data) {
406 if($dataitem =~ /^(\S+):\S+:\S+$/) {
407 # pull the values straight out of the xmlhash
408 my($value) = $xmlhash{$1};
409 # if it's undefined we'll set it to 0
410 # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
411 $value = "0" if not defined $value;
412 $updateparams .= ":$value";
413 }
414 }
415 # perform the update
416 RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
417 &log("updating $rrddir/$machine/$type.rrd\n");
418 my($err) = RRDs::error;
419 &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err;
420 }
421
422 #
423 # sub to create a new rrd file
424 #
425 # $machine = name of the machine
426 # (eg. kernow.ukc.ac.uk)
427 # $type = the type of graph for the machine
428 # (eg. cpu)
429 # $start = the date of the first item we want to add
430 # (in seconds since the epoch)
431 # $step = the interval at which the database steps
432 # @data = the data items we want to put in the rrd
433 # in the form: $dsname:dstype
434 # (eg. "size:GAUGE")
435 #
436 sub makerrd() {
437 my($machine, $type, $start, $step, @data) = @_;
438 # check if directory exists for rrd
439 if(! -d "$rrddir/$machine") {
440 # not sure on this umask, but it seems to work?
441 mkdir "$rrddir/$machine", 0777;
442 &log("created directory $rrddir/$machine\n");
443 }
444 my(@rrdcmd);
445 # we'll want to add our first data item at $start,
446 # so we start our rrd $step before that.
447 $start -= $step;
448 push @rrdcmd, "$rrddir/$machine/$type.rrd";
449 push @rrdcmd, "--start=$start";
450 push @rrdcmd, "--step=$step";
451 foreach my $dataitem (@data) {
452 # dataitem should be: "dsname:dstype"
453 if($dataitem =~ /^(\S+):(\S+)$/) {
454 push @rrdcmd, "DS:$1:$2:600:U:U";
455 }
456 }
457 push @rrdcmd, (
458 # 3h in 15s samples
459 "RRA:AVERAGE:0.5:1:720",
460 "RRA:MAX:0.5:1:720",
461 # 1d in 2m samples
462 "RRA:AVERAGE:0.5:8:720",
463 "RRA:MAX:0.5:8:720",
464 # 1w in 15m samples
465 "RRA:AVERAGE:0.5:60:672",
466 "RRA:MAX:0.5:60:672",
467 # 1m in 1hr samples
468 "RRA:AVERAGE:0.5:240:744",
469 "RRA:MAX:0.5:240:744",
470 # 1y in 12hr samples
471 "RRA:AVERAGE:0.5:2880:730",
472 "RRA:MAX:0.5:2880:730",
473 );
474 RRDs::create (@rrdcmd);
475 my($err) = RRDs::error;
476 &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err;
477 }
478
479 # prints out usage information then exits
480 sub usage() {
481 print "Usage: watch.pl [options] i-scream_client_interface port\n";
482 print "Options\n";
483 print " -c config Specifies the configuration file\n";
484 print " default: rrdgraphing.conf\n";
485 print " -v Be verbose about what's happening\n";
486 print " -q Be quiet, even supress errors\n";
487 print " -V Print version number\n";
488 print " -h Prints this help page\n";
489 exit(1);
490 }
491
492 # prints a log message if verbose is turned on
493 sub log() {
494 my($msg) = @_;
495 print $msg if $verbose;
496 }
497
498 # prints an error message unless quiet is turned on
499 sub error() {
500 my($msg) = @_;
501 print STDERR $msg unless $quiet;
502 }