--- experimental/reports/graphing/watch.pl 2002/03/10 00:26:24 1.2 +++ experimental/reports/graphing/watch.pl 2002/03/10 01:43:15 1.3 @@ -8,9 +8,13 @@ # the i-scream server and collecting data. # # $Author: tdb $ -# $Id: watch.pl,v 1.2 2002/03/10 00:26:24 tdb Exp $ +# $Id: watch.pl,v 1.3 2002/03/10 01:43:15 tdb Exp $ #------------------------------------------------------------ +## TODO +# ought to think about cleaning up when we restart? +# -- old queue data etc + $| = 1; use strict; @@ -34,206 +38,268 @@ my($hex_slash) = "_2f"; # _ converted to a decimal then hex'd my($hex_underscore) = "_5f"; +# step interval in the rrd databases +my($rrdstep) = 15; + +# time to wait (in seconds) before retrying a connection +my($retry_wait) = 10; + if (@ARGV != 2) { - die "Usage: ihost.pl [i-scream client interface] [TCP port]\n"; + die "Usage: watch.pl [i-scream client interface] [TCP port]\n"; } +# user supplied client interface server and port my($addr) = $ARGV[0]; my($cport) = $ARGV[1]; -my($csock) = new IO::Socket::INET( - PeerAddr => $addr, - PeerPort => $cport, - Proto => 'tcp' - ) or die "Cannot connect!"; - -if (!defined $csock) { - print "ERROR: Could not connect to $addr:$cport.\n"; - print "Please check that there is an i-scream server at this address.\n"; - exit(1); -} - -my($response); - -$response = <$csock>; -if ($response && $response ne "PROTOCOL 1.1\n") { - print "The i-scream server sent an unexpected protocol id: $response\n"; +while(1) { + + print "Connecting control channel to port $cport on $addr...\n"; + + # attempt to connect the control channel + my($csock) = new IO::Socket::INET( + PeerAddr => $addr, + PeerPort => $cport, + Proto => 'tcp' + ); + + # if socket isn't defined connection failed + if (!defined $csock) { + print STDERR "ERROR: Could not connect control channel to $addr:$cport.\n"; + print STDERR "Please check that there is an i-scream server at this address.\n"; + &wait_then_retry(); + next; + } + + my($response); + + # client interface should send it's protocol ID + # we know about "PROTOCOL 1.1", and will only accept the same + $response = <$csock>; + if ($response && $response ne "PROTOCOL 1.1\n") { + print STDERR "The i-scream server sent an unexpected protocol ID: $response\n"; + close($csock); + &wait_then_retry(); + next; + } + + # send our identifier to the client interface + print $csock "rrdgraphing\n"; + $response = <$csock>; + if ($response && $response ne "OK\n") { + print STDERR "Received unexpected response: $response\n"; + close($csock); + &wait_then_retry(); + next; + } + + # tell the client interface we'd like to start the data channel + print $csock "STARTDATA\n"; + + # the response should be the socket to connect the data channel to + $response = <$csock>; + chomp $response; + + my($dport) = $response; + print "Connecting data channel to port $dport on $addr...\n"; + + # attempt to connect the data channel + my($dsock) = new IO::Socket::INET( + PeerAddr => $addr, + PeerPort => $dport, + Proto => 'tcp' + ) or die "arse?"; + + # if socket isn't defined connection failed + if (!defined $dsock) { + print STDERR "ERROR: Could not connect data channel to $addr:$dport.\n"; + print STDERR "Failure in communications.\n"; + close($csock); + &wait_then_retry(); + next; + } + + # the data channel should now be sending us data! + + # call sub to process data being received over the data channel + &processdata($dsock); + + # data processing has stopped, close sockets close($csock); - exit(1); + close($dsock); + + # wait before retrying + &wait_then_retry(); } -print $csock "cpugrapher\n"; -$response = <$csock>; -if ($response && $response ne "OK\n") { - print "Received unexpected response: $response\n"; - close($csock); - exit(1); -} +# we'll never reach here... unless 1 becomes false for some reason ;) +exit 0; -print $csock "STARTDATA\n"; -$response = <$csock>; -chop $response; -print "Asked to connect to port $response on $addr, connecting...\n"; - -my($dport) = $response; - -my($dsock) = new IO::Socket::INET( - PeerAddr => $addr, - PeerPort => $dport, - Proto => 'tcp' - ) or die "Cannot connect!"; - -if (!defined $dsock) { - print "ERROR: Could not connect to $addr:$dport.\n"; - print "Failure in communications.\n"; - close($csock); - exit(1); +# +# wait for a while before retrying +# +sub wait_then_retry() { + print STDERR "Will retry connection to i-scream server in $retry_wait seconds.\n\n"; + sleep $retry_wait; } -## below here has been "improved" -## above is still a mess ;) - -while(1) { - # read data - $response = <$dsock>; +# +# Given the socket of the data channel will process all +# the incoming XML data, creating and updating the appropriate +# database files. +# +# $dsock = socket connected to the data channel +# +sub processdata() { + # the socket connected to the data channel + my($dsock) = @_; + # save us recreating this variable each time we loop + my($xml); - # attemp to parse the data - my($err, %xmlhash) = &iscream::XMLParser::parse($response); - if($err) { - print STDERR "Skipped, XML did not parse: $response"; - next; - } - - # standard data packet - if($xmlhash{"packet.attributes.type"} eq "data") { - my($machine) = $xmlhash{"packet.attributes.machine_name"}; - my($date) = $xmlhash{"packet.attributes.date"}; + while(1) { + # read data + $xml = <$dsock>; - # make directory for machine - if(! -d "$rrddir/$machine") { - # not sure on this umask, but it seems to work? - mkdir "$rrddir/$machine", 0777; + # something odd has happened + last if not defined $xml; + + # attempt to parse the data + my($err, %xmlhash) = &iscream::XMLParser::parse($xml); + if($err) { + print STDERR "Skipped, XML did not parse: $xml"; + next; } - my(@data); - - # cpu - @data = ( "packet.cpu.idle:idle:GAUGE", - "packet.cpu.user:user:GAUGE", - "packet.cpu.kernel:kernel:GAUGE", - "packet.cpu.swap:swap:GAUGE", - "packet.cpu.iowait:iowait:GAUGE", - ); - &updaterrd($machine, "cpu", $date, 15, \%xmlhash, @data); - - # mem - @data = ( "packet.memory.free:free:GAUGE", - "packet.memory.total:total:GAUGE", - ); - &updaterrd($machine, "mem", $date, 15, \%xmlhash, @data); - - # load - @data = ( "packet.load.load1:load1:GAUGE", - "packet.load.load5:load5:GAUGE", - "packet.load.load15:load15:GAUGE", - ); - &updaterrd($machine, "load", $date, 15, \%xmlhash, @data); - - # processes - @data = ( "packet.processes.cpu:cpu:GAUGE", - "packet.processes.sleeping:sleeping:GAUGE", - "packet.processes.stopped:stopped:GAUGE", - "packet.processes.total:total:GAUGE", - "packet.processes.zombie:zombie:GAUGE", - ); - &updaterrd($machine, "proc", $date, 15, \%xmlhash, @data); - - # swap - @data = ( "packet.swap.free:free:GAUGE", - "packet.swap.total:total:GAUGE", - ); - &updaterrd($machine, "swap", $date, 15, \%xmlhash, @data); - - # users - @data = ( "packet.users.count:count:GAUGE", - ); - &updaterrd($machine, "users", $date, 15, \%xmlhash, @data); - - # disk - my($i) = 0; - while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) { - my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"}; - $mount =~ s/_/$hex_underscore/g; - $mount =~ s/\//$hex_slash/g; - @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE", - "packet.disk.p$i.attributes.used:used:GAUGE", + # standard data packet + if($xmlhash{"packet.attributes.type"} eq "data") { + my($machine) = $xmlhash{"packet.attributes.machine_name"}; + my($date) = $xmlhash{"packet.attributes.date"}; + + # make directory for machine + if(! -d "$rrddir/$machine") { + # not sure on this umask, but it seems to work? + mkdir "$rrddir/$machine", 0777; + } + + my(@data); + + # cpu + @data = ( "packet.cpu.idle:idle:GAUGE", + "packet.cpu.user:user:GAUGE", + "packet.cpu.kernel:kernel:GAUGE", + "packet.cpu.swap:swap:GAUGE", + "packet.cpu.iowait:iowait:GAUGE", ); - &updaterrd($machine, "disk-$mount", $date, 15, \%xmlhash, @data); - ++$i; + &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data); + + # mem + @data = ( "packet.memory.free:free:GAUGE", + "packet.memory.total:total:GAUGE", + ); + &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data); + + # load + @data = ( "packet.load.load1:load1:GAUGE", + "packet.load.load5:load5:GAUGE", + "packet.load.load15:load15:GAUGE", + ); + &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data); + + # processes + @data = ( "packet.processes.cpu:cpu:GAUGE", + "packet.processes.sleeping:sleeping:GAUGE", + "packet.processes.stopped:stopped:GAUGE", + "packet.processes.total:total:GAUGE", + "packet.processes.zombie:zombie:GAUGE", + ); + &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data); + + # swap + @data = ( "packet.swap.free:free:GAUGE", + "packet.swap.total:total:GAUGE", + ); + &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data); + + # users + @data = ( "packet.users.count:count:GAUGE", + ); + &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data); + + # disk + my($i) = 0; + while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) { + my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"}; + $mount =~ s/_/$hex_underscore/g; + $mount =~ s/\//$hex_slash/g; + @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE", + "packet.disk.p$i.attributes.used:used:GAUGE", + ); + &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data); + ++$i; + } } - } - - # queue statistics packet - elsif($xmlhash{"packet.attributes.type"} eq "queueStat") { - # psuedo machine for internal server stuff - my($machine) = "i-scream-server"; - # make directory - if(! -d "$rrddir/$machine") { - # not sure on this umask, but it seems to work? - mkdir "$rrddir/$machine", 0777; - } - my($hash) = $xmlhash{"packet.attributes.hashCode"}; - my($date) = $xmlhash{"packet.attributes.date"}; - my($name) = $xmlhash{"packet.attributes.name"}; - # take a look to see if we have a shutdown packet... - if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") { - unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>; - next; - } - # look through to see how many internal queues we have - my($i) = 0; - while(defined $xmlhash{"packet.queue.attributes.queue$i"}) { - # see if the queue has been removed - if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") { - # delete the queues rrd - unlink "$rrddir/$machine/$hash\_$i.rrd"; - # are there any other rrd's left on this queue? if not, cleanup. - # get a list of any that may be still there.. - opendir(DIR, "$rrddir/$machine"); - my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR); - closedir DIR; - # count them (+1 because an empty array is size -1) - my($rrdcount) = $#rrdcountfiles + 1; - if($rrdcount == 0) { - # clean up the def file and any images - unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>; + + # queue statistics packet + elsif($xmlhash{"packet.attributes.type"} eq "queueStat") { + # psuedo machine for internal server stuff + my($machine) = "i-scream-server"; + # make directory + if(! -d "$rrddir/$machine") { + # not sure on this umask, but it seems to work? + mkdir "$rrddir/$machine", 0777; + } + my($hash) = $xmlhash{"packet.attributes.hashCode"}; + my($date) = $xmlhash{"packet.attributes.date"}; + my($name) = $xmlhash{"packet.attributes.name"}; + # take a look to see if we have a shutdown packet... + if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") { + unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>; + next; + } + # look through to see how many internal queues we have + my($i) = 0; + while(defined $xmlhash{"packet.queue.attributes.queue$i"}) { + # see if the queue has been removed + if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") { + # delete the queues rrd + unlink "$rrddir/$machine/$hash\_$i.rrd"; + # are there any other rrd's left on this queue? if not, cleanup. + # get a list of any that may be still there.. + opendir(DIR, "$rrddir/$machine"); + my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR); + closedir DIR; + # count them (+1 because an empty array is size -1) + my($rrdcount) = $#rrdcountfiles + 1; + if($rrdcount == 0) { + # clean up the def file and any images + unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>; + } + ++$i; + next; } + # the &updaterrd will also do this check, but we want + # to write our def file out first + if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) { + open(DEF, ">$rrddir/$machine/$hash.def"); + print DEF $name; + close DEF; + } + my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE", + "packet.queue.attributes.total:total:GAUGE", + ); + &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data); ++$i; - next; } - # the &updaterrd will also do this check, but we want - # to write our def file out first - if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) { - open(DEF, ">$rrddir/$machine/$hash.def"); - print DEF $name; - close DEF; - } - my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE", - "packet.queue.attributes.total:total:GAUGE", - ); - &updaterrd($machine, "$hash\_$i", $date, 15, \%xmlhash, @data); - ++$i; } + else { + #print STDERR "SKIPPED: valid xml, but not a data or statistics packet\n"; + } } - else { - #print "SKIPPED: valid xml, but not a data or statistics packet\n"; - } + + # we'll now return from this sub and reconnect + print STDERR "Data channel socket gave no data, bailing out...\n"; } - -# we'll never reach here... unless 1 becomes false for some reason ;) -exit 0; - # # sub to update an rrd file