ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/experimental/reports/graphing/watch.pl
Revision: 1.6
Committed: Wed Mar 13 20:50:48 2002 UTC (22 years, 7 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.5: +5 -2 lines
Log Message:
Added a graph of "the last year". Also fixed a minor bug with the script to
create the rrd's. As an aside, I've noticed the grid on the graphs seems to
differ between the different graphs. I'm sure this is because I've got some
of my numbers a bit silly ;) oh well :)

File Contents

# User Rev Content
1 tdb 1.1 #!/usr/bin/perl -w
2    
3 tdb 1.2 # -----------------------------------------------------------
4     # i-scream graph generation scripts
5     # http://www.i-scream.org.uk
6     #
7     # Generates rrd databases for i-scream data by connecting to
8     # the i-scream server and collecting data.
9     #
10 tdb 1.5 # $Author: tdb $
11 tdb 1.6 # $Id: watch.pl,v 1.5 2002/03/10 15:42:19 tdb Exp $
12 tdb 1.2 #------------------------------------------------------------
13    
14 tdb 1.3 ## TODO
15     # ought to think about cleaning up when we restart?
16     # -- old queue data etc
17    
18 tdb 1.1 $| = 1;
19    
20     use strict;
21     use iscream::XMLParser;
22     use IO::Socket;
23     use RRDs;
24    
25 tdb 1.2 # Base directory for images
26     # (a directory will be constructed for each host under this)
27     my($imgdir) = "/home/tdb/public_html/rrd";
28    
29     # Location of RRD databases
30     my($rrddir) = "/u1/i-scream/rrd";
31    
32     # for reference:
33     # ch -> hex: $hex = sprintf("%02x", ord($ch));
34     # hex -> ch: $ch = chr(hex($hex));
35    
36     # / converted to a decimal then hex'd
37     my($hex_slash) = "_2f";
38     # _ converted to a decimal then hex'd
39     my($hex_underscore) = "_5f";
40    
41 tdb 1.3 # step interval in the rrd databases
42     my($rrdstep) = 15;
43    
44     # time to wait (in seconds) before retrying a connection
45     my($retry_wait) = 10;
46    
47 tdb 1.1 if (@ARGV != 2) {
48 tdb 1.3 die "Usage: watch.pl [i-scream client interface] [TCP port]\n";
49 tdb 1.1 }
50    
51 tdb 1.3 # user supplied client interface server and port
52 tdb 1.1 my($addr) = $ARGV[0];
53     my($cport) = $ARGV[1];
54    
55 tdb 1.3 while(1) {
56    
57     print "Connecting control channel to port $cport on $addr...\n";
58    
59     # attempt to connect the control channel
60     my($csock) = new IO::Socket::INET(
61     PeerAddr => $addr,
62     PeerPort => $cport,
63     Proto => 'tcp'
64     );
65    
66     # if socket isn't defined connection failed
67     if (!defined $csock) {
68     print STDERR "ERROR: Could not connect control channel to $addr:$cport.\n";
69     print STDERR "Please check that there is an i-scream server at this address.\n";
70     &wait_then_retry();
71     next;
72     }
73    
74     my($response);
75    
76     # client interface should send it's protocol ID
77     # we know about "PROTOCOL 1.1", and will only accept the same
78     $response = <$csock>;
79     if ($response && $response ne "PROTOCOL 1.1\n") {
80     print STDERR "The i-scream server sent an unexpected protocol ID: $response\n";
81     close($csock);
82     &wait_then_retry();
83     next;
84     }
85    
86     # send our identifier to the client interface
87     print $csock "rrdgraphing\n";
88     $response = <$csock>;
89     if ($response && $response ne "OK\n") {
90     print STDERR "Received unexpected response: $response\n";
91     close($csock);
92     &wait_then_retry();
93     next;
94     }
95    
96     # tell the client interface we'd like to start the data channel
97     print $csock "STARTDATA\n";
98    
99     # the response should be the socket to connect the data channel to
100     $response = <$csock>;
101     chomp $response;
102    
103     my($dport) = $response;
104     print "Connecting data channel to port $dport on $addr...\n";
105    
106     # attempt to connect the data channel
107     my($dsock) = new IO::Socket::INET(
108     PeerAddr => $addr,
109     PeerPort => $dport,
110     Proto => 'tcp'
111     ) or die "arse?";
112    
113     # if socket isn't defined connection failed
114     if (!defined $dsock) {
115     print STDERR "ERROR: Could not connect data channel to $addr:$dport.\n";
116     print STDERR "Failure in communications.\n";
117     close($csock);
118     &wait_then_retry();
119     next;
120     }
121    
122     # the data channel should now be sending us data!
123    
124     # call sub to process data being received over the data channel
125     &processdata($dsock);
126    
127     # data processing has stopped, close sockets
128 tdb 1.1 close($csock);
129 tdb 1.3 close($dsock);
130    
131     # wait before retrying
132     &wait_then_retry();
133 tdb 1.1 }
134    
135 tdb 1.3 # we'll never reach here... unless 1 becomes false for some reason ;)
136     exit 0;
137 tdb 1.1
138    
139 tdb 1.3 #
140     # wait for a while before retrying
141     #
142     sub wait_then_retry() {
143     print STDERR "Will retry connection to i-scream server in $retry_wait seconds.\n\n";
144     sleep $retry_wait;
145 tdb 1.1 }
146    
147 tdb 1.3 #
148     # Given the socket of the data channel will process all
149     # the incoming XML data, creating and updating the appropriate
150     # database files.
151     #
152     # $dsock = socket connected to the data channel
153     #
154     sub processdata() {
155     # the socket connected to the data channel
156     my($dsock) = @_;
157     # save us recreating this variable each time we loop
158     my($xml);
159 tdb 1.2
160 tdb 1.3 while(1) {
161     # read data
162     $xml = <$dsock>;
163    
164     # something odd has happened
165     last if not defined $xml;
166 tdb 1.2
167 tdb 1.3 # attempt to parse the data
168     my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
169     if($err) {
170     print STDERR "Skipped, XML did not parse: $xml";
171     next;
172 tdb 1.1 }
173 tdb 1.2
174 tdb 1.3 # standard data packet
175     if($xmlhash{"packet.attributes.type"} eq "data") {
176     my($machine) = $xmlhash{"packet.attributes.machine_name"};
177     my($date) = $xmlhash{"packet.attributes.date"};
178    
179     # make directory for machine
180     if(! -d "$rrddir/$machine") {
181     # not sure on this umask, but it seems to work?
182     mkdir "$rrddir/$machine", 0777;
183     }
184    
185     my(@data);
186    
187     # cpu
188     @data = ( "packet.cpu.idle:idle:GAUGE",
189     "packet.cpu.user:user:GAUGE",
190     "packet.cpu.kernel:kernel:GAUGE",
191     "packet.cpu.swap:swap:GAUGE",
192     "packet.cpu.iowait:iowait:GAUGE",
193     );
194     &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
195    
196     # mem
197     @data = ( "packet.memory.free:free:GAUGE",
198     "packet.memory.total:total:GAUGE",
199     );
200     &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
201    
202     # load
203     @data = ( "packet.load.load1:load1:GAUGE",
204     "packet.load.load5:load5:GAUGE",
205     "packet.load.load15:load15:GAUGE",
206     );
207     &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
208    
209     # processes
210     @data = ( "packet.processes.cpu:cpu:GAUGE",
211     "packet.processes.sleeping:sleeping:GAUGE",
212     "packet.processes.stopped:stopped:GAUGE",
213     "packet.processes.total:total:GAUGE",
214     "packet.processes.zombie:zombie:GAUGE",
215     );
216     &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
217    
218     # swap
219     @data = ( "packet.swap.free:free:GAUGE",
220     "packet.swap.total:total:GAUGE",
221     );
222     &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
223    
224     # users
225     @data = ( "packet.users.count:count:GAUGE",
226 tdb 1.2 );
227 tdb 1.3 &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
228    
229     # disk
230     my($i) = 0;
231     while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
232     my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
233     $mount =~ s/_/$hex_underscore/g;
234     $mount =~ s/\//$hex_slash/g;
235     @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE",
236     "packet.disk.p$i.attributes.used:used:GAUGE",
237     );
238     &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
239     ++$i;
240     }
241 tdb 1.1 }
242 tdb 1.3
243     # queue statistics packet
244     elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
245     # psuedo machine for internal server stuff
246     my($machine) = "i-scream-server";
247     # make directory
248     if(! -d "$rrddir/$machine") {
249     # not sure on this umask, but it seems to work?
250     mkdir "$rrddir/$machine", 0777;
251     }
252     my($hash) = $xmlhash{"packet.attributes.hashCode"};
253     my($date) = $xmlhash{"packet.attributes.date"};
254     my($name) = $xmlhash{"packet.attributes.name"};
255     # take a look to see if we have a shutdown packet...
256     if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
257     unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
258     next;
259     }
260     # look through to see how many internal queues we have
261     my($i) = 0;
262     while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
263     # see if the queue has been removed
264     if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
265     # delete the queues rrd
266     unlink "$rrddir/$machine/$hash\_$i.rrd";
267     # are there any other rrd's left on this queue? if not, cleanup.
268     # get a list of any that may be still there..
269     opendir(DIR, "$rrddir/$machine");
270     my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
271     closedir DIR;
272     # count them (+1 because an empty array is size -1)
273     my($rrdcount) = $#rrdcountfiles + 1;
274     if($rrdcount == 0) {
275     # clean up the def file and any images
276     unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
277     }
278     ++$i;
279     next;
280     }
281     # the &updaterrd will also do this check, but we want
282     # to write our def file out first
283     if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
284     open(DEF, ">$rrddir/$machine/$hash.def");
285     print DEF $name;
286     close DEF;
287 tdb 1.1 }
288 tdb 1.3 my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
289 tdb 1.4 "packet.queue.attributes.total:total:COUNTER",
290 tdb 1.3 );
291     &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
292 tdb 1.2 ++$i;
293 tdb 1.1 }
294 tdb 1.3 }
295     else {
296     #print STDERR "SKIPPED: valid xml, but not a data or statistics packet\n";
297 tdb 1.1 }
298     }
299 tdb 1.3
300     # we'll now return from this sub and reconnect
301     print STDERR "Data channel socket gave no data, bailing out...\n";
302 tdb 1.1 }
303    
304 tdb 1.2 #
305     # sub to update an rrd file
306     #
307     # $machine = name of the machine
308     # (eg. kernow.ukc.ac.uk)
309     # $type = the type of graph for the machine
310     # (eg. cpu)
311     # $date = the date of the item we want to add
312     # (in seconds since the epoch)
313     # $step = the interval at which the database steps
314     # used when we create a new rrd
315     # $xmlref = reference to the xml data packet
316     # @data = array containing data items to add
317     # (eg. "packet.cpu.user:user:GAUGE")
318     #
319     sub updaterrd() {
320     my($machine, $type, $date, $step, $xmlref, @data) = @_;
321     # get hold of the xmlhash we have a reference to
322     my(%xmlhash) = %$xmlref;
323     # check if we need to create a new rrd
324     if( ! -f "$rrddir/$machine/$type.rrd") {
325     my(@createdata);
326     # pull the details out of the data we've been given
327     foreach my $dataitem (@data) {
328     if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
329     push @createdata, "$1:$2";
330     }
331     }
332     # call the &makerrd to actually create the rrd
333     print "making new rrd for $rrddir/$machine/$type.rrd\n";
334     &makerrd($machine, $type, $date, $step, @createdata);
335     }
336     # get the details out of the data we've been given
337     my($updateparams) = "$date";
338     foreach my $dataitem (@data) {
339     if($dataitem =~ /^(\S+):\S+:\S+$/) {
340     # pull the values straight out of the xmlhash
341 tdb 1.5 my($value) = $xmlhash{$1};
342     # if it's undefined we'll set it to 0
343     # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
344     $value = "0" if not defined $value;
345     $updateparams .= ":$value";
346 tdb 1.2 }
347     }
348     # perform the update
349     RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
350     my($err) = RRDs::error;
351     print STDERR "Error updating $rrddir/$machine/$type.rrd: $err\n" if $err;
352     }
353    
354     #
355     # sub to create a new rrd file
356     #
357     # $machine = name of the machine
358     # (eg. kernow.ukc.ac.uk)
359     # $type = the type of graph for the machine
360     # (eg. cpu)
361     # $start = the date of the first item we want to add
362     # (in seconds since the epoch)
363     # $step = the interval at which the database steps
364     # @data = the data items we want to put in the rrd
365     # in the form: $dsname:dstype
366     # (eg. "size:GAUGE")
367     #
368     sub makerrd() {
369     my($machine, $type, $start, $step, @data) = @_;
370     # check if directory exists for rrd
371     if(! -d "$rrddir/$machine") {
372     # not sure on this umask, but it seems to work?
373     mkdir "$rrddir/$machine", 0777;
374     }
375     my(@rrdcmd);
376     # we'll want to add our first data item at $start,
377     # so we start our rrd $step before that.
378     $start -= $step;
379     push @rrdcmd, "$rrddir/$machine/$type.rrd";
380     push @rrdcmd, "--start=$start";
381     push @rrdcmd, "--step=$step";
382     foreach my $dataitem (@data) {
383     # dataitem should be: "dsname:dstype"
384     if($dataitem =~ /^(\S+):(\S+)$/) {
385     push @rrdcmd, "DS:$1:$2:600:U:U";
386     }
387     }
388     push @rrdcmd, (
389     # 3h in 15s samples
390     "RRA:AVERAGE:0.5:1:720",
391     "RRA:MAX:0.5:1:720",
392     # 1d in 2m samples
393     "RRA:AVERAGE:0.5:8:720",
394     "RRA:MAX:0.5:8:720",
395     # 1w in 15m samples
396     "RRA:AVERAGE:0.5:60:672",
397     "RRA:MAX:0.5:60:672",
398     # 1m in 1hr samples
399     "RRA:AVERAGE:0.5:240:744",
400 tdb 1.6 "RRA:MAX:0.5:240:744",
401     # 1y in 12hr samples
402     "RRA:AVERAGE:0.5:2880:730",
403     "RRA:MAX:0.5:2880:730",
404 tdb 1.2 );
405     RRDs::create (@rrdcmd);
406     my($err) = RRDs::error;
407     print STDERR "Error creating rrd for $rrddir/$machine/$type: $err\n" if $err;
408 tdb 1.1 }