--- projects/cms/source/reports/rrdgraphing/graph.pl 2002/05/18 18:15:59 1.2 +++ projects/cms/source/reports/rrdgraphing/graph.pl 2005/03/31 20:49:33 1.16 @@ -2,6 +2,7 @@ # # i-scream central monitoring system +# http://www.i-scream.org # Copyright (C) 2000-2002 i-scream # # This program is free software; you can redistribute it and/or @@ -21,43 +22,89 @@ # ----------------------------------------------------------- # i-scream graph generation script -# http://www.i-scream.org.uk +# http://www.i-scream.org # # Generates graphs from rrd databases for i-scream data. # # $Author: tdb $ -# $Id: graph.pl,v 1.2 2002/05/18 18:15:59 tdb Exp $ +# $Id: graph.pl,v 1.16 2005/03/31 20:49:33 tdb Exp $ #------------------------------------------------------------ ## TODO # possibly make more configurable? # -- allow configurable periods of graphs # -- comments, types, etc -# -- move all to external config file +my($version) = '$Id: graph.pl,v 1.16 2005/03/31 20:49:33 tdb Exp $'; + $| = 1; + use strict; +use Getopt::Std; use RRDs; -# Base directory for images -# (a directory will be constructed for each host under this) -my($imgdir) = "/home/pkg/iscream/public_html/graphs"; +# define variables that will be read from the config +# nb. keep this insync with the config file! +use vars qw{ + $imgdir $rrddir + $maxrrdage $maximgage $deleterrds $deleteimgs + $hex_slash $hex_underscore $hex_space $hex_colon $hex_bslash + $rrdstep $retry_wait + $verbose $quiet +}; -# Location of RRD databases -my($rrddir) = "/u1/i-scream/databases"; +# default locate of the config file +my($configfile) = "rrdgraphing.conf"; -# / converted to a decimal then hex'd -my($hex_slash) = "_2f"; -# _ converted to a decimal then hex'd -my($hex_underscore) = "_5f"; - +# check for command line arguments +my(%opts); +my($ret) = getopts('hvqVc:', \%opts); + +# if invalid argument given, $ret will not be 1 +&usage() if $ret != 1; + +# first process the arguments which might mean we exit now + +# -h is usage +if($opts{h}) { + &usage(); +} +# -V is version +if($opts{V}) { + print "graph.pl version: $version\n"; + exit(1); +} + +# Then try getting the config + +# -c specifies the config file location +if($opts{c}) { + $configfile = $opts{c}; +} +# suck in the config +&log("reading config from $configfile\n"); +do $configfile; + +# Then any options we might want to override the config with + +# -v is verbose +if($opts{v}) { + $verbose = $opts{v}; +} +# -q is verbose +if($opts{q}) { + $quiet = $opts{q}; + # if we're meant to be quiet, we can hardly be verbose! + $verbose = 0; +} + # Read the contents of the base directory # and pull out the list of subdirectories (except . and .. :) opendir(DIR, $rrddir); -my(@rrddirlist) = grep { -d "$rrddir/$_" && !/^\.$/ && !/^\.\.$/ } readdir(DIR); +my(@rrddirlist) = sort grep { -d "$rrddir/$_" && !/^\.$/ && !/^\.\.$/ } readdir(DIR); closedir DIR; -# look through each directoty, as they might +# look through each directory, as they might # contain rrds for a particular machine foreach my $machine (@rrddirlist) { # Read the contents of the directory @@ -68,79 +115,182 @@ foreach my $machine (@rrddirlist) { # See what rrd we have, and generate the graphs accordingly foreach my $rrd (@rrdlist) { chomp $rrd; + # stat the file + my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime, + $ctime,$blksize,$blocks) = stat("$rrddir/$machine/$rrd"); + # check if it's old enough to be deleted + if((time - $mtime) > $maxrrdage) { + # do we delete the rrd, or just ignore it? + if($deleterrds) { + # if so, delete it + unlink("$rrddir/$machine/$rrd"); + &log("deleted old rrd $rrddir/$machine/$rrd\n"); + } + else { + &log("ignored old rrd $rrddir/$machine/$rrd\n"); + } + # no more processing required for this rrd + next; + } if($rrd =~ /^(cpu)\.rrd$/) { my(@data); my(@rawdata); - push @data, "LINE2:$1:idle:idle#00FF00:idle cpu"; - push @data, "LINE2:$1:user:user#0000FF:user cpu"; - push @data, "LINE2:$1:kernel:kernel#00FFFF:kernel cpu"; - push @data, "LINE2:$1:swap:swap#FF00FF:swap cpu"; - push @data, "LINE2:$1:iowait:iowait#FF0000:iowait cpu"; + push @data, "AREA:$1:swap:swap#FF00FF:OK:swap cpu "; + push @data, "STACK:$1:iowait:iowait#FF0000:OK:iowait cpu"; + push @data, "STACK:$1:kernel:kernel#00FFFF:OK:kernel cpu"; + push @data, "STACK:$1:user:user#0000FF:OK:user cpu "; + push @data, "LINE2:$1:idle:idle#00FF00:OK:idle cpu "; push @rawdata, "--upper-limit=100"; - &makegraph($machine, $1, "CPU Usage for $machine", \@data, \@rawdata); + &makegraph($machine, $1, "CPU Usage for $machine", "% cpu time", \@data, \@rawdata); } + if($rrd =~ /^(uptime)\.rrd$/) { + my(@data); + push @data, "LINE2:$1:uptime:uptime#0000FF:OK:uptime"; + &makegraph($machine, $1, "Uptime for $machine", "uptime in seconds", \@data); + } if($rrd =~ /^(mem)\.rrd$/) { my(@data); my(@rawdata); - # we don't actually want to display free memory, - # although we need it to do inuse... - push @data, "NONE:$1:free:free#CCCCFF:free memory"; - push @data, "LINE2:$1:total:total#0000FF:total memory"; - # calculate inuse - push @rawdata, "CDEF:inuse=total,free,-"; + # we don't actually want to display free or total memory, + # although we need it to work out peruse... + push @data, "NONE:$1:free:free#CCCCFF:NONE:free memory"; + push @data, "NONE:$1:total:total#0000FF:NONE:total memory\\n"; + push @data, "NONE:$1:cache:cache#0000FF:NONE:cache memory\\n"; + # calculate peruse - note that we only use 'free' if it's + # less or equal to total (this is to avoid negative percentages :) + push @rawdata, "CDEF:peruse=total,free,total,LE,free,0,IF,-,total,/,100,*"; + push @rawdata, "CDEF:percacuse=cache,total,LE,cache,0,IF,total,/,100,*"; # and add it to the graph - push @rawdata, "AREA:inuse#CCCCFF:memory in use"; + push @rawdata, "AREA:peruse#CCCCFF:% memory in use"; + &addlegend(\@rawdata, "peruse"); + push @rawdata, "LINE2:percacuse#0000FF:% memory cache "; + &addlegend(\@rawdata, "percacuse"); + push @rawdata, "--upper-limit=100"; push @rawdata, "--base=1024"; - &makegraph($machine, $1, "Memory Usage for $machine", \@data, \@rawdata); + # put the total memory on the graph so we can map percentages to real values + push @rawdata, "GPRINT:total:LAST:Current total memory\\: \%.2lf %sb\\c"; + &makegraph($machine, $1, "Memory Usage for $machine", "% memory in use", \@data, \@rawdata); } if($rrd =~ /^(load)\.rrd$/) { my(@data); - push @data, "LINE2:$1:load1:load1#CCCCFF:1 minute load average"; - push @data, "LINE2:$1:load5:load5#7777FF:5 minute load average"; - push @data, "LINE2:$1:load15:load15#0000FF:15 minute load average"; - &makegraph($machine, $1, "Loads for $machine", \@data); + push @data, "LINE2:$1:load1:load1#CCCCFF:OK: 1 min load average"; + push @data, "LINE2:$1:load5:load5#7777FF:OK: 5 min load average"; + push @data, "LINE2:$1:load15:load15#0000FF:OK:15 min load average"; + &makegraph($machine, $1, "Loads for $machine", "load average", \@data); } if($rrd =~ /^(proc)\.rrd$/) { my(@data); - push @data, "LINE2:$1:cpu:cpu#00FF00:cpu processes"; - push @data, "LINE2:$1:sleeping:sleeping#0000FF:sleeping processes"; - push @data, "LINE2:$1:stopped:stopped#00FFFF:stopped processes"; - push @data, "LINE2:$1:total:total#FF00FF:total processes"; - push @data, "LINE2:$1:zombie:zombie#FF0000:zombie processes"; - &makegraph($machine, $1, "Processes on $machine", \@data); + push @data, "AREA:$1:stopped:stopped#00FFFF:OK:stopped processes "; + push @data, "STACK:$1:zombie:zombie#FF0000:OK:zombie processes "; + push @data, "STACK:$1:cpu:cpu#00FF00:OK:cpu processes "; + push @data, "STACK:$1:sleeping:sleeping#0000FF:OK:sleeping processes"; + push @data, "LINE2:$1:total:total#FF00FF:OK:total processes "; + &makegraph($machine, $1, "Processes on $machine", "no. of processes", \@data); } if($rrd =~ /^(swap)\.rrd$/) { my(@data); my(@rawdata); - # we don't actually want to display free swap, - # although we need it to do inuse... - push @data, "NONE:$1:free:free#CCCCFF:free swap"; - push @data, "LINE2:$1:total:total#0000FF:total swap"; - # calculate inuse - push @rawdata, "CDEF:inuse=total,free,-"; + # we don't actually want to display free or total swap, + # although we need it to work out peruse... + push @data, "NONE:$1:free:free#CCCCFF:NONE:free swap"; + push @data, "NONE:$1:total:total#0000FF:NONE:total swap\\n"; + # calculate peruse - note that we only use 'free' if it's + # less or equal to total (this is to avoid negative percentages :) + push @rawdata, "CDEF:peruse=total,free,total,LE,free,0,IF,-,total,/,100,*"; # and add it to the graph - push @rawdata, "AREA:inuse#CCCCFF:swap in use"; + push @rawdata, "AREA:peruse#CCCCFF:% swap in use"; + push @rawdata, "--upper-limit=100"; push @rawdata, "--base=1024"; - &makegraph($machine, $1, "Swap Usage for $machine", \@data, \@rawdata); + # add some nice values to the legend + &addlegend(\@rawdata, "peruse"); + # put the total swap on the graph so we can map percentages to real values + push @rawdata, "GPRINT:total:LAST:Current total swap\\: \%.2lf %sb\\c"; + &makegraph($machine, $1, "Swap Usage for $machine", "% swap in use", \@data, \@rawdata); } if($rrd =~ /^(users)\.rrd$/) { my(@data); - push @data, "AREA:$1:count:count#CCCCFF:user count"; - &makegraph($machine, $1, "User Count for $machine", \@data); + push @data, "AREA:$1:count:count#CCCCFF:OK:user count"; + &makegraph($machine, $1, "User Count for $machine", "no. of users", \@data); } - if($rrd =~ /^(disk)-(\S+).rrd$/) { + if($rrd =~ /^(paging)\.rrd$/) { my(@data); + push @data, "AREA:$1:pageins:pageins#00FF00:OK:pages paged in "; + push @data, "LINE2:$1:pageouts:pageouts#0000FF:OK:pages paged out"; + &makegraph($machine, $1, "Paging on $machine", "pages per second", \@data); + } + if($rrd =~ /^(disk)-(\S+)\.rrd$/) { + my(@data); my(@rawdata); - push @data, "LINE2:$1-$2:kbytes:kbytes#0000FF:total size"; - push @data, "AREA:$1-$2:used:used#CCCCFF:used"; + # we need this lot for our calculations, but we'll never show them + push @data, "NONE:$1-$2:total:total#0000FF:NONE:total size\\n"; + push @data, "NONE:$1-$2:used:used#CCCCFF:NONE:used space"; + push @data, "NONE:$1-$2:totalinodes:totalinodes#000000:NONE:total inodes"; + push @data, "NONE:$1-$2:freeinodes:freeinodes#000000:NONE:free inodes"; + # calculate peruse, add it to the graph, and add a legend + push @rawdata, "CDEF:peruse=used,total,/,100,*"; + push @rawdata, "AREA:peruse#CCCCFF:% disk used "; + &addlegend(\@rawdata, "peruse"); + # put the total space on the graph so we can map percentages to real values + push @rawdata, "GPRINT:total:LAST:Current total space\\: \%.2lf %sb\\c"; + # calculate perinodeuse, add it to the graph, and add a legend + push @rawdata, "CDEF:perinodeuse=totalinodes,freeinodes,totalinodes,LT,freeinodes,0,IF,-,totalinodes,/,100,*"; + push @rawdata, "LINE2:perinodeuse#FF4444:% inodes used"; + push @rawdata, "--upper-limit=100"; push @rawdata, "--base=1024"; + &addlegend(\@rawdata, "perinodeuse"); + # put the total inodes on the graph so we can map percentages to real values + push @rawdata, "GPRINT:totalinodes:LAST:Current total inodes\\: \%.2lf %s\\c"; + # some name tidting my($type) = $1; my($name) = $2; my($nicename) = $2; $nicename =~ s/$hex_slash/\//g; $nicename =~ s/$hex_underscore/_/g; - &makegraph($machine, "$type-$name", "Disk Usage for $machine on $nicename", \@data, \@rawdata); + $nicename =~ s/$hex_bslash/\\/g; + $nicename =~ s/$hex_space/ /g; + $nicename =~ s/$hex_colon/:/g; + &makegraph($machine, "$type-$name", "Disk Usage for $machine on $nicename", "% usage", \@data, \@rawdata); } + if($rrd =~ /^(diskio)-(\S+)\.rrd$/) { + my(@data); + my(@rawdata); + push @data, "AREA:$1-$2:rbytes:rbytes#00FF00:OK:read bytes "; + push @data, "LINE2:$1-$2:wbytes:wbytes#0000FF:OK:write bytes"; + push @rawdata, "--base=1024"; + # some name tidting + my($type) = $1; + my($name) = $2; + my($nicename) = $2; + $nicename =~ s/$hex_slash/\//g; + $nicename =~ s/$hex_underscore/_/g; + $nicename =~ s/$hex_bslash/\\/g; + $nicename =~ s/$hex_space/ /g; + $nicename =~ s/$hex_colon/:/g; + &makegraph($machine, "$type-$name", "Disk IO for $machine on $nicename", "bytes per second", \@data, \@rawdata); + } + if($rrd =~ /^(net)-(\S+)\.rrd$/) { + my(@data); + my(@rawdata); + push @data, "AREA:$1-$2:rx:rx#00FF00:OK:received bytes "; + push @data, "LINE2:$1-$2:tx:tx#0000FF:OK:transfered bytes"; + push @rawdata, "--base=1024"; + # some name tidting + my($type) = $1; + my($name) = $2; + my($nicename) = $2; + $nicename =~ s/$hex_slash/\//g; + $nicename =~ s/$hex_underscore/_/g; + $nicename =~ s/$hex_bslash/\\/g; + $nicename =~ s/$hex_space/ /g; + $nicename =~ s/$hex_colon/:/g; + &makegraph($machine, "$type-$name", "Network IO for $machine on $nicename", "bytes per second", \@data, \@rawdata); + } + if($rrd =~ /^(mailq)-(\S+)\.rrd$/) { + my(@data); + my(@rawdata); + push @data, "LINE2:$1-$2:size:size#0000FF:OK:messages"; + &makegraph($machine, "$1-$2", "Mail Queue ($2) Size for $machine", "messages in queue", \@data, \@rawdata); + } # probably a queue with a name like this :) if($rrd =~ /^(\d+)_0\.rrd$/) { my(@data); @@ -148,11 +298,10 @@ foreach my $machine (@rrddirlist) { my($baserrd) = $1; my($i) = 0; while( -f "$rrddir/$machine/$baserrd\_$i.rrd" ) { - push @data, "LINE2:$baserrd\_$i:size:size$i" . &get_colour($i) . ":queue$i size "; + push @data, "LINE2:$baserrd\_$i:size:size$i" . &get_colour($i) . ":OK:queue$i size "; ++$i; } - push @data, "LINE2:$baserrd\_0:total:total#FF0000:packets/sec - currently"; - push @rawdata, "GPRINT:total:LAST:%lf %spackets/sec"; + push @data, "LINE2:$baserrd\_0:total:total#FF0000:OK:packets/sec "; my($comment); if(-f "$rrddir/$machine/$baserrd.def") { open(DEF, "$rrddir/$machine/$baserrd.def"); @@ -160,11 +309,66 @@ foreach my $machine (@rrddirlist) { chomp $comment if defined $comment; } $comment = "unknown queue" if not defined $comment; - &makegraph($machine, $baserrd, $comment, \@data, \@rawdata); + &makegraph($machine, $baserrd, $comment, "", \@data, \@rawdata); } } + # have a last check, maybe we can remove the directory now? + # (only if we're deleting stuff) + if($deleterrds) { + # Read the contents of the directory + opendir(DIR, "$rrddir/$machine"); + my(@dirlist) = grep { !/^\.$/ && !/^\.\.$/ } readdir(DIR); + closedir DIR; + if($#dirlist == -1) { + rmdir "$rrddir/$machine"; + &log("deleting empty rrd directory $rrddir/$machine\n"); + } + } } +if($deleteimgs) { + # Read the contents of the graphs directory + # and pull out the list of subdirectories (except . and .. :) + opendir(DIR, $imgdir); + my(@imgdirlist) = sort grep { -d "$imgdir/$_" && !/^\.$/ && !/^\.\.$/ } readdir(DIR); + closedir DIR; + + # look through each directory, as they might + # contain images for a particular machine + foreach my $machine (@imgdirlist) { + # Read the contents of the directory + opendir(DIR, "$imgdir/$machine"); + my(@imglist) = grep { /\.png$/ && -f "$imgdir/$machine/$_" } readdir(DIR); + closedir DIR; + + # See what rrd we have, and generate the graphs accordingly + foreach my $img (@imglist) { + chomp $img; + # stat the img + my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime, + $ctime,$blksize,$blocks) = stat("$imgdir/$machine/$img"); + # check if it's old enough to be deleted + if((time - $mtime) > $maximgage) { + # if so, delete it + unlink("$imgdir/$machine/$img"); + &log("deleted old image $imgdir/$machine/$img\n"); + } + } + # have a last check, maybe we can remove the directory now? + # Read the contents of the directory + opendir(DIR, "$imgdir/$machine"); + my(@dirlist) = grep { !/^\.$/ && !/^\.\.$/ } readdir(DIR); + closedir DIR; + if($#dirlist == -1) { + rmdir "$imgdir/$machine"; + &log("deleted empty image directory $imgdir/$machine\n"); + } + } +} + +exit(0); + + # # subroutine to make some graphs # @@ -174,15 +378,18 @@ foreach my $machine (@rrddirlist) { # (eg. cpu) # $title = the title for the graph # (eg. kernow CPU usage) +# $vlabel = the vertical label to apply to the left side of the graph +# (eg. kb/s) # $dataref = a reference to an array containing information for the graph -# elements of format: "gtype:rrdname:dsname:name#colour:comment with spaces" +# elements of format: "gtype:rrdname:dsname:name#colour:legend:comment with spaces" # (if gtype is "NONE" only a DEF of 'name' will be defined, no line will be plotted) +# (if legend is "NONE" the latest/average/max/min legend won't be printed) # $rawcmdref = a reference to an array containing raw rrd commands # elements a single command each, no spaces # sub makegraph() { - my($machine, $type, $title, $dataref, $rawcmdref) = @_; + my($machine, $type, $title, $vlabel, $dataref, $rawcmdref) = @_; # pass in these arrays by reference my(@data) = @$dataref if defined $dataref; my(@rawcmd) = @$rawcmdref if defined $rawcmdref; @@ -190,43 +397,64 @@ sub makegraph() { if(! -d "$imgdir/$machine") { # not sure on this umask, but it seems to work? mkdir "$imgdir/$machine", 0777; + &log("created directory $imgdir/$machine\n"); } my(@rrdcmd); foreach my $dataitem (@data) { - # dataitem should be: "gtype:rrdname:dsname:name#colour:comment with spaces" + # dataitem should be: "gtype:rrdname:dsname:name#colour:legend:comment with spaces" # (if gtype is "NONE" only a DEF of 'name' will be defined, no line will be plotted) - if($dataitem =~ /^(\S+):(\S+):(\S+):(\S+)#(.{6}):(.*)$/) { + # (if legend is "NONE" the latest/average/max/min legend won't be printed) + if($dataitem =~ /^(\S+):(\S+):(\S+):(\S+)#(.{6}):(\S+):(.*)$/) { push @rrdcmd, "DEF:$4=$rrddir/$machine/$2.rrd:$3:AVERAGE"; if($1 ne "NONE") { - push @rrdcmd, "$1:$4#$5:$6"; + push @rrdcmd, "$1:$4#$5:$7"; + if($6 ne "NONE") { + # add some nice values to the legend + &addlegend(\@rrdcmd, $4); + } } } } push @rrdcmd, "--title=$title"; push @rrdcmd, "--imgformat=PNG"; push @rrdcmd, "--lower-limit=0"; + push @rrdcmd, "--vertical-label=$vlabel"; # not entirely convinced this is good... push @rrdcmd, "--alt-autoscale-max"; # add any further raw commands push @rrdcmd, @rawcmd; RRDs::graph ("$imgdir/$machine/$type-3h.png", "--start=-10800", @rrdcmd); my($err_3h) = RRDs::error; - print STDERR "Error generating 3h graph for $machine/$type: $err_3h\n" if $err_3h; + &log("created $imgdir/$machine/$type-3h.png\n") unless $err_3h; + &error("Error generating 3h graph for $machine/$type: $err_3h\n") if $err_3h; RRDs::graph ("$imgdir/$machine/$type-1d.png", "--start=-86400", @rrdcmd); my($err_1d) = RRDs::error; - print STDERR "Error generating 1d graph for $machine/$type: $err_1d\n" if $err_1d; + &log("created $imgdir/$machine/$type-1d.png\n") unless $err_1d; + &error("Error generating 1d graph for $machine/$type: $err_1d\n") if $err_1d; RRDs::graph ("$imgdir/$machine/$type-1w.png", "--start=-604800", @rrdcmd); my($err_1w) = RRDs::error; - print STDERR "Error generating 1w graph for $machine/$type: $err_1w\n" if $err_1w; + &log("created $imgdir/$machine/$type-1w.png\n") unless $err_1w; + &error("Error generating 1w graph for $machine/$type: $err_1w\n") if $err_1w; RRDs::graph ("$imgdir/$machine/$type-1m.png", "--start=-2678400", @rrdcmd); my($err_1m) = RRDs::error; - print STDERR "Error generating 1m graph for $machine/$type: $err_1m\n" if $err_1m; + &log("created $imgdir/$machine/$type-1m.png\n") unless $err_1m; + &error("Error generating 1m graph for $machine/$type: $err_1m\n") if $err_1m; RRDs::graph ("$imgdir/$machine/$type-1y.png", "--start=-31536000", @rrdcmd); my($err_1y) = RRDs::error; - print STDERR "Error generating 1y graph for $machine/$type: $err_1y\n" if $err_1y; + &log("created $imgdir/$machine/$type-1y.png\n") unless $err_1y; + &error("Error generating 1y graph for $machine/$type: $err_1y\n") if $err_1y; return; } +# subroutine to add a legend +# accepts reference to an array and a name +sub addlegend() { + my($dataref, $name) = @_; + push @$dataref, "GPRINT:$name:LAST:Current\\: \%8.2lf %s"; + push @$dataref, "GPRINT:$name:AVERAGE:Average\\: \%8.2lf %s"; + push @$dataref, "GPRINT:$name:MAX:Max\\: \%8.2lf %s\\n"; +} + # hacky subroutine to return a colour # could be done much better somehow :/ sub get_colour { @@ -249,4 +477,29 @@ sub get_colour { else { return "#000066"; } +} + +# prints out usage information then exits +sub usage() { + print "Usage: graph.pl [options]\n"; + print "Options\n"; + print " -c config Specifies the configuration file\n"; + print " default: rrdgraphing.conf\n"; + print " -v Be verbose about what's happening\n"; + print " -q Be quiet, even supress errors\n"; + print " -V Print version number\n"; + print " -h Prints this help page\n"; + exit(1); +} + +# prints a log message if verbose is turned on +sub log() { + my($msg) = @_; + print $msg if $verbose; +} + +# prints an error message unless quiet is turned on +sub error() { + my($msg) = @_; + print STDERR $msg unless $quiet; }