--- projects/cms/source/reports/rrdgraphing/watch.pl 2002/03/18 13:24:31 1.1 +++ projects/cms/source/reports/rrdgraphing/watch.pl 2005/06/15 15:39:25 1.11 @@ -1,60 +1,118 @@ #!/usr/bin/perl -w +# +# i-scream central monitoring system +# http://www.i-scream.org +# Copyright (C) 2000-2002 i-scream +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# + # ----------------------------------------------------------- # i-scream graph generation scripts -# http://www.i-scream.org.uk +# http://www.i-scream.org # # Generates rrd databases for i-scream data by connecting to # the i-scream server and collecting data. # # $Author: tdb $ -# $Id: watch.pl,v 1.1 2002/03/18 13:24:31 tdb Exp $ +# $Id: watch.pl,v 1.11 2005/06/15 15:39:25 tdb Exp $ #------------------------------------------------------------ -## TODO -# ought to think about cleaning up when we restart? -# -- old queue data etc +my($version) = '$Id: watch.pl,v 1.11 2005/06/15 15:39:25 tdb Exp $'; $| = 1; use strict; -use iscream::XMLParser; +use Getopt::Std; use IO::Socket; +use iscream::XMLParser; use RRDs; -# Base directory for images -# (a directory will be constructed for each host under this) -my($imgdir) = "/home/pkg/iscream/public_html/graphs"; +# define variables that will be read from the config +# nb. keep this insync with the config file! +use vars qw{ + $imgdir $rrddir + $maxrrdage $maximgage $deleterrds $deleteimgs + $hex_slash $hex_underscore $hex_space $hex_colon $hex_bslash $hex_rbracket + $hex_lbracket $hex_plus + $rrdstep $retry_wait + $verbose $quiet +}; -# Location of RRD databases -my($rrddir) = "/u1/i-scream/databases"; +# default locate of the config file +my($configfile) = "rrdgraphing.conf"; -# for reference: -# ch -> hex: $hex = sprintf("%02x", ord($ch)); -# hex -> ch: $ch = chr(hex($hex)); +# check for command line arguments +my(%opts); +my($ret) = getopts('hvqVc:', \%opts); -# / converted to a decimal then hex'd -my($hex_slash) = "_2f"; -# _ converted to a decimal then hex'd -my($hex_underscore) = "_5f"; +# if invalid argument given, $ret will not be 1 +&usage() if $ret != 1; -# step interval in the rrd databases -my($rrdstep) = 15; +# first process the arguments which might mean we exit now -# time to wait (in seconds) before retrying a connection -my($retry_wait) = 10; +# -h is usage +if($opts{h}) { + &usage(); +} +# -V is version +if($opts{V}) { + print "watch.pl version: $version\n"; + exit(1); +} +# Then try getting the config + +# -c specifies the config file location +if($opts{c}) { + $configfile = $opts{c}; +} +# suck in the config +&log("reading config from $configfile\n"); +do $configfile; + +# Then any options we might want to override the config with + +# -v is verbose +if($opts{v}) { + $verbose = $opts{v}; +} +# -q is verbose +if($opts{q}) { + $quiet = $opts{q}; + # if we're meant to be quiet, we can hardly be verbose! +# $verbose = 0; +} + +# Finally check for required arguments + +# check we still have two arguments left if (@ARGV != 2) { - die "Usage: watch.pl [i-scream client interface] [TCP port]\n"; + &usage(); } # user supplied client interface server and port my($addr) = $ARGV[0]; my($cport) = $ARGV[1]; + +# Main program loop while(1) { - print "Connecting control channel to port $cport on $addr...\n"; + &log("Connecting control channel to port $cport on $addr...\n"); # attempt to connect the control channel my($csock) = new IO::Socket::INET( @@ -65,8 +123,8 @@ while(1) { # if socket isn't defined connection failed if (!defined $csock) { - print STDERR "ERROR: Could not connect control channel to $addr:$cport.\n"; - print STDERR "Please check that there is an i-scream server at this address.\n"; + &error("ERROR: Could not connect control channel to $addr:$cport.\n"); + &error("Please check that there is an i-scream server at this address.\n"); &wait_then_retry(); next; } @@ -76,8 +134,9 @@ while(1) { # client interface should send it's protocol ID # we know about "PROTOCOL 1.1", and will only accept the same $response = <$csock>; + &log("CLI sent: $response"); if ($response && $response ne "PROTOCOL 1.1\n") { - print STDERR "The i-scream server sent an unexpected protocol ID: $response\n"; + &error("The i-scream server sent an unexpected protocol ID: $response\n"); close($csock); &wait_then_retry(); next; @@ -85,9 +144,11 @@ while(1) { # send our identifier to the client interface print $csock "rrdgraphing\n"; + &log("we sent: rrdgraphing\n"); $response = <$csock>; + &log("CLI sent: $response"); if ($response && $response ne "OK\n") { - print STDERR "Received unexpected response: $response\n"; + &error("Received unexpected response: $response\n"); close($csock); &wait_then_retry(); next; @@ -95,13 +156,15 @@ while(1) { # tell the client interface we'd like to start the data channel print $csock "STARTDATA\n"; - + &log("we sent: STARTDATA\n"); + # the response should be the socket to connect the data channel to $response = <$csock>; + &log("CLI sent: $response"); chomp $response; my($dport) = $response; - print "Connecting data channel to port $dport on $addr...\n"; + &log("Connecting data channel to port $dport on $addr...\n"); # attempt to connect the data channel my($dsock) = new IO::Socket::INET( @@ -112,8 +175,8 @@ while(1) { # if socket isn't defined connection failed if (!defined $dsock) { - print STDERR "ERROR: Could not connect data channel to $addr:$dport.\n"; - print STDERR "Failure in communications.\n"; + &error("ERROR: Could not connect data channel to $addr:$dport.\n"); + &error("Failure in communications.\n"); close($csock); &wait_then_retry(); next; @@ -140,7 +203,7 @@ exit 0; # wait for a while before retrying # sub wait_then_retry() { - print STDERR "Will retry connection to i-scream server in $retry_wait seconds.\n\n"; + &error("Will retry connection to i-scream server in $retry_wait seconds.\n\n"); sleep $retry_wait; } @@ -167,7 +230,7 @@ sub processdata() { # attempt to parse the data my($err, %xmlhash) = &iscream::XMLParser::parse($xml); if($err) { - print STDERR "Skipped, XML did not parse: $xml"; + &error("Skipped, XML did not parse: $xml"); next; } @@ -180,6 +243,7 @@ sub processdata() { if(! -d "$rrddir/$machine") { # not sure on this umask, but it seems to work? mkdir "$rrddir/$machine", 0777; + &log("created directory $rrddir/$machine\n"); } my(@data); @@ -193,9 +257,14 @@ sub processdata() { ); &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data); + # uptime + @data = ( "packet.os.uptime:uptime:GAUGE" ); + &updaterrd($machine, "uptime", $date, $rrdstep, \%xmlhash, @data); + # mem @data = ( "packet.memory.free:free:GAUGE", "packet.memory.total:total:GAUGE", + "packet.memory.cache:cache:GAUGE", ); &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data); @@ -225,19 +294,76 @@ sub processdata() { @data = ( "packet.users.count:count:GAUGE", ); &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data); - + + # paging + @data = ( "packet.pages.pageins:pageins:GAUGE", + "packet.pages.pageouts:pageouts:GAUGE", + ); + &updaterrd($machine, "paging", $date, $rrdstep, \%xmlhash, @data); + # disk my($i) = 0; while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) { my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"}; $mount =~ s/_/$hex_underscore/g; $mount =~ s/\//$hex_slash/g; - @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE", + $mount =~ s/\\/$hex_bslash/g; + $mount =~ s/ /$hex_space/g; + $mount =~ s/:/$hex_colon/g; + @data = ( "packet.disk.p$i.attributes.total:total:GAUGE", "packet.disk.p$i.attributes.used:used:GAUGE", + "packet.disk.p$i.attributes.totalinodes:totalinodes:GAUGE", + "packet.disk.p$i.attributes.freeinodes:freeinodes:GAUGE", ); &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data); ++$i; } + + # diskio + $i = 0; + while(defined $xmlhash{"packet.diskio.p$i.attributes.name"}) { + my($name) = $xmlhash{"packet.diskio.p$i.attributes.name"}; + $name =~ s/_/$hex_underscore/g; + $name =~ s/\//$hex_slash/g; + $name =~ s/\\/$hex_bslash/g; + $name =~ s/ /$hex_space/g; + $name =~ s/:/$hex_colon/g; + @data = ( "packet.diskio.p$i.attributes.rbytes:rbytes:GAUGE", + "packet.diskio.p$i.attributes.wbytes:wbytes:GAUGE", + ); + &updaterrd($machine, "diskio-$name", $date, $rrdstep, \%xmlhash, @data); + ++$i + } + + # net + $i = 0; + while(defined $xmlhash{"packet.net.p$i.attributes.name"}) { + my($name) = $xmlhash{"packet.net.p$i.attributes.name"}; + $name =~ s/_/$hex_underscore/g; + $name =~ s/\//$hex_slash/g; + $name =~ s/\\/$hex_bslash/g; + $name =~ s/ /$hex_space/g; + $name =~ s/:/$hex_colon/g; + $name =~ s/\(/$hex_lbracket/g; + $name =~ s/\)/$hex_rbracket/g; + $name =~ s/\+/$hex_plus/g; + @data = ( "packet.net.p$i.attributes.rx:rx:GAUGE", + "packet.net.p$i.attributes.tx:tx:GAUGE", + ); + &updaterrd($machine, "net-$name", $date, $rrdstep, \%xmlhash, @data); + ++$i + } + + # mailq + $i = 0; + while(defined $xmlhash{"packet.mailq.p$i.attributes.name"}) { + my($name) = $xmlhash{"packet.mailq.p$i.attributes.name"}; + $name =~ s/\s+//g; + @data = ( "packet.mailq.p$i.attributes.size:size:GAUGE", + ); + &updaterrd($machine, "mailq-$name", $date, $rrdstep, \%xmlhash, @data); + ++$i + } } # queue statistics packet @@ -248,6 +374,7 @@ sub processdata() { if(! -d "$rrddir/$machine") { # not sure on this umask, but it seems to work? mkdir "$rrddir/$machine", 0777; + &log("created directory $rrddir/$machine\n"); } my($hash) = $xmlhash{"packet.attributes.hashCode"}; my($date) = $xmlhash{"packet.attributes.date"}; @@ -255,6 +382,7 @@ sub processdata() { # take a look to see if we have a shutdown packet... if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") { unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>; + &log("deleted $rrddir/$machine/$hash\_*.rrd $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n"); next; } # look through to see how many internal queues we have @@ -264,6 +392,7 @@ sub processdata() { if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") { # delete the queues rrd unlink "$rrddir/$machine/$hash\_$i.rrd"; + &log("deleted $rrddir/$machine/$hash\_$i.rrd\n"); # are there any other rrd's left on this queue? if not, cleanup. # get a list of any that may be still there.. opendir(DIR, "$rrddir/$machine"); @@ -274,6 +403,7 @@ sub processdata() { if($rrdcount == 0) { # clean up the def file and any images unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>; + &log("deleted $rrddir/$machine/$hash.def $imgdir/$machine/$hash*.png\n"); } ++$i; next; @@ -284,6 +414,7 @@ sub processdata() { open(DEF, ">$rrddir/$machine/$hash.def"); print DEF $name; close DEF; + &log("created $rrddir/$machine/$hash.def\n"); } my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE", "packet.queue.attributes.total:total:COUNTER", @@ -293,12 +424,12 @@ sub processdata() { } } else { - #print STDERR "SKIPPED: valid xml, but not a data or statistics packet\n"; + #&error("SKIPPED: valid xml, but not a data or statistics packet\n"); } } # we'll now return from this sub and reconnect - print STDERR "Data channel socket gave no data, bailing out...\n"; + &error("Data channel socket gave no data, bailing out...\n"); } # @@ -330,7 +461,7 @@ sub updaterrd() { } } # call the &makerrd to actually create the rrd - print "making new rrd for $rrddir/$machine/$type.rrd\n"; + &log("making new rrd for $rrddir/$machine/$type.rrd\n"); &makerrd($machine, $type, $date, $step, @createdata); } # get the details out of the data we've been given @@ -347,8 +478,9 @@ sub updaterrd() { } # perform the update RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams); + &log("updating $rrddir/$machine/$type.rrd\n"); my($err) = RRDs::error; - print STDERR "Error updating $rrddir/$machine/$type.rrd: $err\n" if $err; + &error("Error updating $rrddir/$machine/$type.rrd: $err\n") if $err; } # @@ -371,6 +503,7 @@ sub makerrd() { if(! -d "$rrddir/$machine") { # not sure on this umask, but it seems to work? mkdir "$rrddir/$machine", 0777; + &log("created directory $rrddir/$machine\n"); } my(@rrdcmd); # we'll want to add our first data item at $start, @@ -404,5 +537,30 @@ sub makerrd() { ); RRDs::create (@rrdcmd); my($err) = RRDs::error; - print STDERR "Error creating rrd for $rrddir/$machine/$type: $err\n" if $err; + &error("Error creating rrd for $rrddir/$machine/$type: $err\n") if $err; +} + +# prints out usage information then exits +sub usage() { + print "Usage: watch.pl [options] i-scream_client_interface port\n"; + print "Options\n"; + print " -c config Specifies the configuration file\n"; + print " default: rrdgraphing.conf\n"; + print " -v Be verbose about what's happening\n"; + print " -q Be quiet, even supress errors\n"; + print " -V Print version number\n"; + print " -h Prints this help page\n"; + exit(1); +} + +# prints a log message if verbose is turned on +sub log() { + my($msg) = @_; + print $msg if $verbose; +} + +# prints an error message unless quiet is turned on +sub error() { + my($msg) = @_; + print STDERR $msg unless $quiet; }