--- projects/cms/source/reports/rrdgraphing/graph.pl 2002/05/21 11:37:56 1.4 +++ projects/cms/source/reports/rrdgraphing/graph.pl 2006/11/23 16:37:44 1.20 @@ -2,6 +2,7 @@ # # i-scream central monitoring system +# http://www.i-scream.org # Copyright (C) 2000-2002 i-scream # # This program is free software; you can redistribute it and/or @@ -21,58 +22,90 @@ # ----------------------------------------------------------- # i-scream graph generation script -# http://www.i-scream.org.uk +# http://www.i-scream.org # # Generates graphs from rrd databases for i-scream data. # # $Author: tdb $ -# $Id: graph.pl,v 1.4 2002/05/21 11:37:56 tdb Exp $ +# $Id: graph.pl,v 1.20 2006/11/23 16:37:44 tdb Exp $ #------------------------------------------------------------ ## TODO # possibly make more configurable? # -- allow configurable periods of graphs # -- comments, types, etc -# -- move all to external config file +my($version) = '$Id: graph.pl,v 1.20 2006/11/23 16:37:44 tdb Exp $'; + $| = 1; + use strict; +use Getopt::Std; use RRDs; -# Base directory for images -# (a directory will be constructed for each host under this) -my($imgdir) = "/home/pkg/iscream/public_html/graphs"; +# define variables that will be read from the config +# nb. keep this insync with the config file! +use vars qw{ + $imgdir $imgwidth $imgheight $rrddir + $maxrrdage $maximgage $deleterrds $deleteimgs + $hex_slash $hex_underscore $hex_space $hex_colon $hex_bslash $hex_rbracket + $hex_lbracket $hex_plus $hex_hash + $rrdstep $retry_wait + $verbose $quiet +}; -# Location of RRD databases -my($rrddir) = "/u1/i-scream/databases"; +# default locate of the config file +my($configfile) = "rrdgraphing.conf"; -# / converted to a decimal then hex'd -my($hex_slash) = "_2f"; -# _ converted to a decimal then hex'd -my($hex_underscore) = "_5f"; +# check for command line arguments +my(%opts); +my($ret) = getopts('hvqVc:', \%opts); -# maximum age (last modified) before an rrd or graph get cleaned up -# (in seconds) -my($maxrrdage) = 3600; # 1 hour -my($maximgage) = 3600; # 1 hour +# if invalid argument given, $ret will not be 1 +&usage() if $ret != 1; -# delete rrd's when they get cleaned up? -# if unset, will just ignore the rrd's -# - usually best to leave this off, we don't want to delete useful rrds :) -my($deleterrds) = 0; +# first process the arguments which might mean we exit now -# delete graphs when they get cleaned up? -# if unset, won't bother checking at all -# - usually best to leave this on -my($deleteimgs) = 1; - +# -h is usage +if($opts{h}) { + &usage(); +} +# -V is version +if($opts{V}) { + print "graph.pl version: $version\n"; + exit(1); +} + +# Then try getting the config + +# -c specifies the config file location +if($opts{c}) { + $configfile = $opts{c}; +} +# suck in the config +&log("reading config from $configfile\n"); +do $configfile; + +# Then any options we might want to override the config with + +# -v is verbose +if($opts{v}) { + $verbose = $opts{v}; +} +# -q is verbose +if($opts{q}) { + $quiet = $opts{q}; + # if we're meant to be quiet, we can hardly be verbose! + $verbose = 0; +} + # Read the contents of the base directory # and pull out the list of subdirectories (except . and .. :) opendir(DIR, $rrddir); -my(@rrddirlist) = grep { -d "$rrddir/$_" && !/^\.$/ && !/^\.\.$/ } readdir(DIR); +my(@rrddirlist) = sort grep { -d "$rrddir/$_" && !/^\.$/ && !/^\.\.$/ } readdir(DIR); closedir DIR; -# look through each directoty, as they might +# look through each directory, as they might # contain rrds for a particular machine foreach my $machine (@rrddirlist) { # Read the contents of the directory @@ -92,83 +125,177 @@ foreach my $machine (@rrddirlist) { if($deleterrds) { # if so, delete it unlink("$rrddir/$machine/$rrd"); + &log("deleted old rrd $rrddir/$machine/$rrd\n"); } + else { + &log("ignored old rrd $rrddir/$machine/$rrd\n"); + } # no more processing required for this rrd next; } if($rrd =~ /^(cpu)\.rrd$/) { my(@data); my(@rawdata); - push @data, "LINE2:$1:idle:idle#00FF00:idle cpu"; - push @data, "LINE2:$1:user:user#0000FF:user cpu"; - push @data, "LINE2:$1:kernel:kernel#00FFFF:kernel cpu"; - push @data, "LINE2:$1:swap:swap#FF00FF:swap cpu"; - push @data, "LINE2:$1:iowait:iowait#FF0000:iowait cpu"; + push @data, "AREA:$1:swap:swap#FF00FF:OK:swap cpu "; + push @data, "STACK:$1:iowait:iowait#FF0000:OK:iowait cpu"; + push @data, "STACK:$1:kernel:kernel#00FFFF:OK:kernel cpu"; + push @data, "STACK:$1:user:user#0000FF:OK:user cpu "; + push @data, "LINE2:$1:idle:idle#00FF00:OK:idle cpu "; push @rawdata, "--upper-limit=100"; - &makegraph($machine, $1, "CPU Usage for $machine", \@data, \@rawdata); + &makegraph($machine, $1, "CPU Usage for $machine", "% cpu time", \@data, \@rawdata); } + if($rrd =~ /^(uptime)\.rrd$/) { + my(@data); + push @data, "LINE2:$1:uptime:uptime#0000FF:OK:uptime"; + &makegraph($machine, $1, "Uptime for $machine", "uptime in seconds", \@data); + } if($rrd =~ /^(mem)\.rrd$/) { my(@data); my(@rawdata); - # we don't actually want to display free memory, - # although we need it to do inuse... - push @data, "NONE:$1:free:free#CCCCFF:free memory"; - push @data, "LINE2:$1:total:total#0000FF:total memory"; - # calculate inuse - push @rawdata, "CDEF:inuse=total,free,-"; + # we don't actually want to display free or total memory, + # although we need it to work out peruse... + push @data, "NONE:$1:free:free#CCCCFF:NONE:free memory"; + push @data, "NONE:$1:total:total#0000FF:NONE:total memory\\n"; + push @data, "NONE:$1:cache:cache#0000FF:NONE:cache memory\\n"; + # calculate peruse - note that we only use 'free' if it's + # less or equal to total (this is to avoid negative percentages :) + push @rawdata, "CDEF:peruse=total,free,total,LE,free,0,IF,-,total,/,100,*"; + push @rawdata, "CDEF:percacuse=cache,total,LE,cache,0,IF,total,/,100,*"; # and add it to the graph - push @rawdata, "AREA:inuse#CCCCFF:memory in use"; + push @rawdata, "AREA:peruse#CCCCFF:% memory in use"; + &addlegend(\@rawdata, "peruse"); + push @rawdata, "LINE2:percacuse#0000FF:% memory cache "; + &addlegend(\@rawdata, "percacuse"); + push @rawdata, "--upper-limit=100"; push @rawdata, "--base=1024"; - &makegraph($machine, $1, "Memory Usage for $machine", \@data, \@rawdata); + # put the total memory on the graph so we can map percentages to real values + push @rawdata, "GPRINT:total:LAST:Current total memory\\: \%.2lf %sb\\c"; + &makegraph($machine, $1, "Memory Usage for $machine", "% memory in use", \@data, \@rawdata); } if($rrd =~ /^(load)\.rrd$/) { my(@data); - push @data, "LINE2:$1:load1:load1#CCCCFF:1 minute load average"; - push @data, "LINE2:$1:load5:load5#7777FF:5 minute load average"; - push @data, "LINE2:$1:load15:load15#0000FF:15 minute load average"; - &makegraph($machine, $1, "Loads for $machine", \@data); + push @data, "LINE2:$1:load1:load1#CCCCFF:OK: 1 min load average"; + push @data, "LINE2:$1:load5:load5#7777FF:OK: 5 min load average"; + push @data, "LINE2:$1:load15:load15#0000FF:OK:15 min load average"; + &makegraph($machine, $1, "Loads for $machine", "load average", \@data); } if($rrd =~ /^(proc)\.rrd$/) { my(@data); - push @data, "LINE2:$1:cpu:cpu#00FF00:cpu processes"; - push @data, "LINE2:$1:sleeping:sleeping#0000FF:sleeping processes"; - push @data, "LINE2:$1:stopped:stopped#00FFFF:stopped processes"; - push @data, "LINE2:$1:total:total#FF00FF:total processes"; - push @data, "LINE2:$1:zombie:zombie#FF0000:zombie processes"; - &makegraph($machine, $1, "Processes on $machine", \@data); + push @data, "AREA:$1:stopped:stopped#00FFFF:OK:stopped processes "; + push @data, "STACK:$1:zombie:zombie#FF0000:OK:zombie processes "; + push @data, "STACK:$1:cpu:cpu#00FF00:OK:cpu processes "; + push @data, "STACK:$1:sleeping:sleeping#0000FF:OK:sleeping processes"; + push @data, "LINE2:$1:total:total#FF00FF:OK:total processes "; + &makegraph($machine, $1, "Processes on $machine", "no. of processes", \@data); } if($rrd =~ /^(swap)\.rrd$/) { my(@data); my(@rawdata); - # we don't actually want to display free swap, - # although we need it to do inuse... - push @data, "NONE:$1:free:free#CCCCFF:free swap"; - push @data, "LINE2:$1:total:total#0000FF:total swap"; - # calculate inuse - push @rawdata, "CDEF:inuse=total,free,-"; + # we don't actually want to display free or total swap, + # although we need it to work out peruse... + push @data, "NONE:$1:free:free#CCCCFF:NONE:free swap"; + push @data, "NONE:$1:total:total#0000FF:NONE:total swap\\n"; + # calculate peruse - note that we only use 'free' if it's + # less or equal to total (this is to avoid negative percentages :) + push @rawdata, "CDEF:peruse=total,free,total,LE,free,0,IF,-,total,/,100,*"; # and add it to the graph - push @rawdata, "AREA:inuse#CCCCFF:swap in use"; + push @rawdata, "AREA:peruse#CCCCFF:% swap in use"; + push @rawdata, "--upper-limit=100"; push @rawdata, "--base=1024"; - &makegraph($machine, $1, "Swap Usage for $machine", \@data, \@rawdata); + # add some nice values to the legend + &addlegend(\@rawdata, "peruse"); + # put the total swap on the graph so we can map percentages to real values + push @rawdata, "GPRINT:total:LAST:Current total swap\\: \%.2lf %sb\\c"; + &makegraph($machine, $1, "Swap Usage for $machine", "% swap in use", \@data, \@rawdata); } if($rrd =~ /^(users)\.rrd$/) { my(@data); - push @data, "AREA:$1:count:count#CCCCFF:user count"; - &makegraph($machine, $1, "User Count for $machine", \@data); + push @data, "AREA:$1:count:count#CCCCFF:OK:user count"; + &makegraph($machine, $1, "User Count for $machine", "no. of users", \@data); } - if($rrd =~ /^(disk)-(\S+).rrd$/) { + if($rrd =~ /^(paging)\.rrd$/) { my(@data); + push @data, "AREA:$1:pageins:pageins#00FF00:OK:pages paged in "; + push @data, "LINE2:$1:pageouts:pageouts#0000FF:OK:pages paged out"; + &makegraph($machine, $1, "Paging on $machine", "pages per second", \@data); + } + if($rrd =~ /^(disk)-(\S+)\.rrd$/) { + my(@data); my(@rawdata); - push @data, "LINE2:$1-$2:kbytes:kbytes#0000FF:total size"; - push @data, "AREA:$1-$2:used:used#CCCCFF:used"; + # we need this lot for our calculations, but we'll never show them + push @data, "NONE:$1-$2:total:total#0000FF:NONE:total size\\n"; + push @data, "NONE:$1-$2:used:used#CCCCFF:NONE:used space"; + push @data, "NONE:$1-$2:totalinodes:totalinodes#000000:NONE:total inodes"; + push @data, "NONE:$1-$2:freeinodes:freeinodes#000000:NONE:free inodes"; + # calculate peruse, add it to the graph, and add a legend + push @rawdata, "CDEF:peruse=used,total,/,100,*"; + push @rawdata, "AREA:peruse#CCCCFF:% disk used "; + &addlegend(\@rawdata, "peruse"); + # put the total space on the graph so we can map percentages to real values + push @rawdata, "GPRINT:total:LAST:Current total space\\: \%.2lf %sb\\c"; + # calculate perinodeuse, add it to the graph, and add a legend + push @rawdata, "CDEF:perinodeuse=totalinodes,freeinodes,totalinodes,LT,freeinodes,0,IF,-,totalinodes,/,100,*"; + push @rawdata, "LINE2:perinodeuse#FF4444:% inodes used"; + push @rawdata, "--upper-limit=100"; push @rawdata, "--base=1024"; + &addlegend(\@rawdata, "perinodeuse"); + # put the total inodes on the graph so we can map percentages to real values + push @rawdata, "GPRINT:totalinodes:LAST:Current total inodes\\: \%.2lf %s\\c"; + # some name tidting my($type) = $1; my($name) = $2; my($nicename) = $2; $nicename =~ s/$hex_slash/\//g; $nicename =~ s/$hex_underscore/_/g; - &makegraph($machine, "$type-$name", "Disk Usage for $machine on $nicename", \@data, \@rawdata); + $nicename =~ s/$hex_bslash/\\/g; + $nicename =~ s/$hex_space/ /g; + $nicename =~ s/$hex_colon/:/g; + &makegraph($machine, "$type-$name", "Disk Usage for $machine on $nicename", "% usage", \@data, \@rawdata); } + if($rrd =~ /^(diskio)-(\S+)\.rrd$/) { + my(@data); + my(@rawdata); + push @data, "AREA:$1-$2:rbytes:rbytes#00FF00:OK:read bytes "; + push @data, "LINE2:$1-$2:wbytes:wbytes#0000FF:OK:write bytes"; + push @rawdata, "--base=1024"; + # some name tidying + my($type) = $1; + my($name) = $2; + my($nicename) = $2; + $nicename =~ s/$hex_slash/\//g; + $nicename =~ s/$hex_underscore/_/g; + $nicename =~ s/$hex_bslash/\\/g; + $nicename =~ s/$hex_space/ /g; + $nicename =~ s/$hex_colon/:/g; + &makegraph($machine, "$type-$name", "Disk IO for $machine on $nicename", "bytes per second", \@data, \@rawdata); + } + if($rrd =~ /^(net)-(\S+)\.rrd$/) { + my(@data); + my(@rawdata); + push @data, "AREA:$1-$2:rx:rx#00FF00:OK:received bytes "; + push @data, "LINE2:$1-$2:tx:tx#0000FF:OK:transfered bytes"; + push @rawdata, "--base=1024"; + # some name tidying + my($type) = $1; + my($name) = $2; + my($nicename) = $2; + $nicename =~ s/$hex_slash/\//g; + $nicename =~ s/$hex_underscore/_/g; + $nicename =~ s/$hex_bslash/\\/g; + $nicename =~ s/$hex_space/ /g; + $nicename =~ s/$hex_colon/:/g; + $nicename =~ s/$hex_lbracket/(/g; + $nicename =~ s/$hex_rbracket/)/g; + $nicename =~ s/$hex_plus/+/g; + $nicename =~ s/$hex_hash/#/g; + &makegraph($machine, "$type-$name", "Network IO for $machine on $nicename", "bytes per second", \@data, \@rawdata); + } + if($rrd =~ /^(mailq)-(\S+)\.rrd$/) { + my(@data); + my(@rawdata); + push @data, "LINE2:$1-$2:size:size#0000FF:OK:messages"; + &makegraph($machine, "$1-$2", "Mail Queue ($2) Size for $machine", "messages in queue", \@data, \@rawdata); + } # probably a queue with a name like this :) if($rrd =~ /^(\d+)_0\.rrd$/) { my(@data); @@ -176,11 +303,10 @@ foreach my $machine (@rrddirlist) { my($baserrd) = $1; my($i) = 0; while( -f "$rrddir/$machine/$baserrd\_$i.rrd" ) { - push @data, "LINE2:$baserrd\_$i:size:size$i" . &get_colour($i) . ":queue$i size "; + push @data, "LINE2:$baserrd\_$i:size:size$i" . &get_colour($i) . ":OK:queue$i size "; ++$i; } - push @data, "LINE2:$baserrd\_0:total:total#FF0000:packets/sec - currently"; - push @rawdata, "GPRINT:total:LAST:%lf %spackets/sec"; + push @data, "LINE2:$baserrd\_0:total:total#FF0000:OK:packets/sec "; my($comment); if(-f "$rrddir/$machine/$baserrd.def") { open(DEF, "$rrddir/$machine/$baserrd.def"); @@ -188,7 +314,7 @@ foreach my $machine (@rrddirlist) { chomp $comment if defined $comment; } $comment = "unknown queue" if not defined $comment; - &makegraph($machine, $baserrd, $comment, \@data, \@rawdata); + &makegraph($machine, $baserrd, $comment, "", \@data, \@rawdata); } } # have a last check, maybe we can remove the directory now? @@ -200,6 +326,7 @@ foreach my $machine (@rrddirlist) { closedir DIR; if($#dirlist == -1) { rmdir "$rrddir/$machine"; + &log("deleting empty rrd directory $rrddir/$machine\n"); } } } @@ -208,10 +335,10 @@ if($deleteimgs) { # Read the contents of the graphs directory # and pull out the list of subdirectories (except . and .. :) opendir(DIR, $imgdir); - my(@imgdirlist) = grep { -d "$imgdir/$_" && !/^\.$/ && !/^\.\.$/ } readdir(DIR); + my(@imgdirlist) = sort grep { -d "$imgdir/$_" && !/^\.$/ && !/^\.\.$/ } readdir(DIR); closedir DIR; - # look through each directoty, as they might + # look through each directory, as they might # contain images for a particular machine foreach my $machine (@imgdirlist) { # Read the contents of the directory @@ -229,6 +356,7 @@ if($deleteimgs) { if((time - $mtime) > $maximgage) { # if so, delete it unlink("$imgdir/$machine/$img"); + &log("deleted old image $imgdir/$machine/$img\n"); } } # have a last check, maybe we can remove the directory now? @@ -238,6 +366,7 @@ if($deleteimgs) { closedir DIR; if($#dirlist == -1) { rmdir "$imgdir/$machine"; + &log("deleted empty image directory $imgdir/$machine\n"); } } } @@ -254,15 +383,18 @@ exit(0); # (eg. cpu) # $title = the title for the graph # (eg. kernow CPU usage) +# $vlabel = the vertical label to apply to the left side of the graph +# (eg. kb/s) # $dataref = a reference to an array containing information for the graph -# elements of format: "gtype:rrdname:dsname:name#colour:comment with spaces" +# elements of format: "gtype:rrdname:dsname:name#colour:legend:comment with spaces" # (if gtype is "NONE" only a DEF of 'name' will be defined, no line will be plotted) +# (if legend is "NONE" the latest/average/max/min legend won't be printed) # $rawcmdref = a reference to an array containing raw rrd commands # elements a single command each, no spaces # sub makegraph() { - my($machine, $type, $title, $dataref, $rawcmdref) = @_; + my($machine, $type, $title, $vlabel, $dataref, $rawcmdref) = @_; # pass in these arrays by reference my(@data) = @$dataref if defined $dataref; my(@rawcmd) = @$rawcmdref if defined $rawcmdref; @@ -270,43 +402,66 @@ sub makegraph() { if(! -d "$imgdir/$machine") { # not sure on this umask, but it seems to work? mkdir "$imgdir/$machine", 0777; + &log("created directory $imgdir/$machine\n"); } my(@rrdcmd); foreach my $dataitem (@data) { - # dataitem should be: "gtype:rrdname:dsname:name#colour:comment with spaces" + # dataitem should be: "gtype:rrdname:dsname:name#colour:legend:comment with spaces" # (if gtype is "NONE" only a DEF of 'name' will be defined, no line will be plotted) - if($dataitem =~ /^(\S+):(\S+):(\S+):(\S+)#(.{6}):(.*)$/) { + # (if legend is "NONE" the latest/average/max/min legend won't be printed) + if($dataitem =~ /^(\S+):(\S+):(\S+):(\S+)#(.{6}):(\S+):(.*)$/) { push @rrdcmd, "DEF:$4=$rrddir/$machine/$2.rrd:$3:AVERAGE"; if($1 ne "NONE") { - push @rrdcmd, "$1:$4#$5:$6"; + push @rrdcmd, "$1:$4#$5:$7"; + if($6 ne "NONE") { + # add some nice values to the legend + &addlegend(\@rrdcmd, $4); + } } } } push @rrdcmd, "--title=$title"; push @rrdcmd, "--imgformat=PNG"; push @rrdcmd, "--lower-limit=0"; + push @rrdcmd, "--vertical-label=$vlabel"; + push @rrdcmd, "--width=$imgwidth"; + push @rrdcmd, "--height=$imgheight"; # not entirely convinced this is good... push @rrdcmd, "--alt-autoscale-max"; # add any further raw commands push @rrdcmd, @rawcmd; RRDs::graph ("$imgdir/$machine/$type-3h.png", "--start=-10800", @rrdcmd); my($err_3h) = RRDs::error; - print STDERR "Error generating 3h graph for $machine/$type: $err_3h\n" if $err_3h; + &log("created $imgdir/$machine/$type-3h.png\n") unless $err_3h; + &error("Error generating 3h graph for $machine/$type: $err_3h\n") if $err_3h; RRDs::graph ("$imgdir/$machine/$type-1d.png", "--start=-86400", @rrdcmd); my($err_1d) = RRDs::error; - print STDERR "Error generating 1d graph for $machine/$type: $err_1d\n" if $err_1d; + &log("created $imgdir/$machine/$type-1d.png\n") unless $err_1d; + &error("Error generating 1d graph for $machine/$type: $err_1d\n") if $err_1d; RRDs::graph ("$imgdir/$machine/$type-1w.png", "--start=-604800", @rrdcmd); my($err_1w) = RRDs::error; - print STDERR "Error generating 1w graph for $machine/$type: $err_1w\n" if $err_1w; + &log("created $imgdir/$machine/$type-1w.png\n") unless $err_1w; + &error("Error generating 1w graph for $machine/$type: $err_1w\n") if $err_1w; RRDs::graph ("$imgdir/$machine/$type-1m.png", "--start=-2678400", @rrdcmd); my($err_1m) = RRDs::error; - print STDERR "Error generating 1m graph for $machine/$type: $err_1m\n" if $err_1m; + &log("created $imgdir/$machine/$type-1m.png\n") unless $err_1m; + &error("Error generating 1m graph for $machine/$type: $err_1m\n") if $err_1m; RRDs::graph ("$imgdir/$machine/$type-1y.png", "--start=-31536000", @rrdcmd); my($err_1y) = RRDs::error; - print STDERR "Error generating 1y graph for $machine/$type: $err_1y\n" if $err_1y; + &log("created $imgdir/$machine/$type-1y.png\n") unless $err_1y; + &error("Error generating 1y graph for $machine/$type: $err_1y\n") if $err_1y; return; } +# subroutine to add a legend +# accepts reference to an array and a name +sub addlegend() { + my($dataref, $name) = @_; + push @$dataref, "GPRINT:$name:LAST:Current\\: \%8.2lf %s"; + push @$dataref, "GPRINT:$name:AVERAGE:Average\\: \%8.2lf %s"; + push @$dataref, "GPRINT:$name:MAX:Max\\: \%8.2lf %s\\n"; +} + # hacky subroutine to return a colour # could be done much better somehow :/ sub get_colour { @@ -329,4 +484,29 @@ sub get_colour { else { return "#000066"; } +} + +# prints out usage information then exits +sub usage() { + print "Usage: graph.pl [options]\n"; + print "Options\n"; + print " -c config Specifies the configuration file\n"; + print " default: rrdgraphing.conf\n"; + print " -v Be verbose about what's happening\n"; + print " -q Be quiet, even supress errors\n"; + print " -V Print version number\n"; + print " -h Prints this help page\n"; + exit(1); +} + +# prints a log message if verbose is turned on +sub log() { + my($msg) = @_; + print $msg if $verbose; +} + +# prints an error message unless quiet is turned on +sub error() { + my($msg) = @_; + print STDERR $msg unless $quiet; }