ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/projects/cms/source/reports/rrdgraphing/watch.pl
Revision: 1.2
Committed: Sat May 18 18:15:59 2002 UTC (22 years, 7 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.1: +20 -1 lines
Log Message:
i-scream is now licensed under the GPL. I've added the GPL headers to every
source file, and put a full copy of the license in the appropriate places.
I think I've covered everything. This is going to be a mad commit ;)

File Contents

# User Rev Content
1 tdb 1.1 #!/usr/bin/perl -w
2    
3 tdb 1.2 #
4     # i-scream central monitoring system
5     # Copyright (C) 2000-2002 i-scream
6     #
7     # This program is free software; you can redistribute it and/or
8     # modify it under the terms of the GNU General Public License
9     # as published by the Free Software Foundation; either version 2
10     # of the License, or (at your option) any later version.
11     #
12     # This program is distributed in the hope that it will be useful,
13     # but WITHOUT ANY WARRANTY; without even the implied warranty of
14     # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15     # GNU General Public License for more details.
16     #
17     # You should have received a copy of the GNU General Public License
18     # along with this program; if not, write to the Free Software
19     # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20     #
21    
22 tdb 1.1 # -----------------------------------------------------------
23     # i-scream graph generation scripts
24     # http://www.i-scream.org.uk
25     #
26     # Generates rrd databases for i-scream data by connecting to
27     # the i-scream server and collecting data.
28     #
29     # $Author: tdb $
30 tdb 1.2 # $Id: watch.pl,v 1.1 2002/03/18 13:24:31 tdb Exp $
31 tdb 1.1 #------------------------------------------------------------
32    
33     ## TODO
34     # ought to think about cleaning up when we restart?
35     # -- old queue data etc
36    
37     $| = 1;
38    
39     use strict;
40     use iscream::XMLParser;
41     use IO::Socket;
42     use RRDs;
43    
44     # Base directory for images
45     # (a directory will be constructed for each host under this)
46     my($imgdir) = "/home/pkg/iscream/public_html/graphs";
47    
48     # Location of RRD databases
49     my($rrddir) = "/u1/i-scream/databases";
50    
51     # for reference:
52     # ch -> hex: $hex = sprintf("%02x", ord($ch));
53     # hex -> ch: $ch = chr(hex($hex));
54    
55     # / converted to a decimal then hex'd
56     my($hex_slash) = "_2f";
57     # _ converted to a decimal then hex'd
58     my($hex_underscore) = "_5f";
59    
60     # step interval in the rrd databases
61     my($rrdstep) = 15;
62    
63     # time to wait (in seconds) before retrying a connection
64     my($retry_wait) = 10;
65    
66     if (@ARGV != 2) {
67     die "Usage: watch.pl [i-scream client interface] [TCP port]\n";
68     }
69    
70     # user supplied client interface server and port
71     my($addr) = $ARGV[0];
72     my($cport) = $ARGV[1];
73    
74     while(1) {
75    
76     print "Connecting control channel to port $cport on $addr...\n";
77    
78     # attempt to connect the control channel
79     my($csock) = new IO::Socket::INET(
80     PeerAddr => $addr,
81     PeerPort => $cport,
82     Proto => 'tcp'
83     );
84    
85     # if socket isn't defined connection failed
86     if (!defined $csock) {
87     print STDERR "ERROR: Could not connect control channel to $addr:$cport.\n";
88     print STDERR "Please check that there is an i-scream server at this address.\n";
89     &wait_then_retry();
90     next;
91     }
92    
93     my($response);
94    
95     # client interface should send it's protocol ID
96     # we know about "PROTOCOL 1.1", and will only accept the same
97     $response = <$csock>;
98     if ($response && $response ne "PROTOCOL 1.1\n") {
99     print STDERR "The i-scream server sent an unexpected protocol ID: $response\n";
100     close($csock);
101     &wait_then_retry();
102     next;
103     }
104    
105     # send our identifier to the client interface
106     print $csock "rrdgraphing\n";
107     $response = <$csock>;
108     if ($response && $response ne "OK\n") {
109     print STDERR "Received unexpected response: $response\n";
110     close($csock);
111     &wait_then_retry();
112     next;
113     }
114    
115     # tell the client interface we'd like to start the data channel
116     print $csock "STARTDATA\n";
117    
118     # the response should be the socket to connect the data channel to
119     $response = <$csock>;
120     chomp $response;
121    
122     my($dport) = $response;
123     print "Connecting data channel to port $dport on $addr...\n";
124    
125     # attempt to connect the data channel
126     my($dsock) = new IO::Socket::INET(
127     PeerAddr => $addr,
128     PeerPort => $dport,
129     Proto => 'tcp'
130     ) or die "arse?";
131    
132     # if socket isn't defined connection failed
133     if (!defined $dsock) {
134     print STDERR "ERROR: Could not connect data channel to $addr:$dport.\n";
135     print STDERR "Failure in communications.\n";
136     close($csock);
137     &wait_then_retry();
138     next;
139     }
140    
141     # the data channel should now be sending us data!
142    
143     # call sub to process data being received over the data channel
144     &processdata($dsock);
145    
146     # data processing has stopped, close sockets
147     close($csock);
148     close($dsock);
149    
150     # wait before retrying
151     &wait_then_retry();
152     }
153    
154     # we'll never reach here... unless 1 becomes false for some reason ;)
155     exit 0;
156    
157    
158     #
159     # wait for a while before retrying
160     #
161     sub wait_then_retry() {
162     print STDERR "Will retry connection to i-scream server in $retry_wait seconds.\n\n";
163     sleep $retry_wait;
164     }
165    
166     #
167     # Given the socket of the data channel will process all
168     # the incoming XML data, creating and updating the appropriate
169     # database files.
170     #
171     # $dsock = socket connected to the data channel
172     #
173     sub processdata() {
174     # the socket connected to the data channel
175     my($dsock) = @_;
176     # save us recreating this variable each time we loop
177     my($xml);
178    
179     while(1) {
180     # read data
181     $xml = <$dsock>;
182    
183     # something odd has happened
184     last if not defined $xml;
185    
186     # attempt to parse the data
187     my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
188     if($err) {
189     print STDERR "Skipped, XML did not parse: $xml";
190     next;
191     }
192    
193     # standard data packet
194     if($xmlhash{"packet.attributes.type"} eq "data") {
195     my($machine) = $xmlhash{"packet.attributes.machine_name"};
196     my($date) = $xmlhash{"packet.attributes.date"};
197    
198     # make directory for machine
199     if(! -d "$rrddir/$machine") {
200     # not sure on this umask, but it seems to work?
201     mkdir "$rrddir/$machine", 0777;
202     }
203    
204     my(@data);
205    
206     # cpu
207     @data = ( "packet.cpu.idle:idle:GAUGE",
208     "packet.cpu.user:user:GAUGE",
209     "packet.cpu.kernel:kernel:GAUGE",
210     "packet.cpu.swap:swap:GAUGE",
211     "packet.cpu.iowait:iowait:GAUGE",
212     );
213     &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
214    
215     # mem
216     @data = ( "packet.memory.free:free:GAUGE",
217     "packet.memory.total:total:GAUGE",
218     );
219     &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
220    
221     # load
222     @data = ( "packet.load.load1:load1:GAUGE",
223     "packet.load.load5:load5:GAUGE",
224     "packet.load.load15:load15:GAUGE",
225     );
226     &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
227    
228     # processes
229     @data = ( "packet.processes.cpu:cpu:GAUGE",
230     "packet.processes.sleeping:sleeping:GAUGE",
231     "packet.processes.stopped:stopped:GAUGE",
232     "packet.processes.total:total:GAUGE",
233     "packet.processes.zombie:zombie:GAUGE",
234     );
235     &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
236    
237     # swap
238     @data = ( "packet.swap.free:free:GAUGE",
239     "packet.swap.total:total:GAUGE",
240     );
241     &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
242    
243     # users
244     @data = ( "packet.users.count:count:GAUGE",
245     );
246     &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
247    
248     # disk
249     my($i) = 0;
250     while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
251     my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
252     $mount =~ s/_/$hex_underscore/g;
253     $mount =~ s/\//$hex_slash/g;
254     @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE",
255     "packet.disk.p$i.attributes.used:used:GAUGE",
256     );
257     &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
258     ++$i;
259     }
260     }
261    
262     # queue statistics packet
263     elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
264     # psuedo machine for internal server stuff
265     my($machine) = "i-scream-server";
266     # make directory
267     if(! -d "$rrddir/$machine") {
268     # not sure on this umask, but it seems to work?
269     mkdir "$rrddir/$machine", 0777;
270     }
271     my($hash) = $xmlhash{"packet.attributes.hashCode"};
272     my($date) = $xmlhash{"packet.attributes.date"};
273     my($name) = $xmlhash{"packet.attributes.name"};
274     # take a look to see if we have a shutdown packet...
275     if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
276     unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
277     next;
278     }
279     # look through to see how many internal queues we have
280     my($i) = 0;
281     while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
282     # see if the queue has been removed
283     if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
284     # delete the queues rrd
285     unlink "$rrddir/$machine/$hash\_$i.rrd";
286     # are there any other rrd's left on this queue? if not, cleanup.
287     # get a list of any that may be still there..
288     opendir(DIR, "$rrddir/$machine");
289     my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
290     closedir DIR;
291     # count them (+1 because an empty array is size -1)
292     my($rrdcount) = $#rrdcountfiles + 1;
293     if($rrdcount == 0) {
294     # clean up the def file and any images
295     unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
296     }
297     ++$i;
298     next;
299     }
300     # the &updaterrd will also do this check, but we want
301     # to write our def file out first
302     if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
303     open(DEF, ">$rrddir/$machine/$hash.def");
304     print DEF $name;
305     close DEF;
306     }
307     my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
308     "packet.queue.attributes.total:total:COUNTER",
309     );
310     &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
311     ++$i;
312     }
313     }
314     else {
315     #print STDERR "SKIPPED: valid xml, but not a data or statistics packet\n";
316     }
317     }
318    
319     # we'll now return from this sub and reconnect
320     print STDERR "Data channel socket gave no data, bailing out...\n";
321     }
322    
323     #
324     # sub to update an rrd file
325     #
326     # $machine = name of the machine
327     # (eg. kernow.ukc.ac.uk)
328     # $type = the type of graph for the machine
329     # (eg. cpu)
330     # $date = the date of the item we want to add
331     # (in seconds since the epoch)
332     # $step = the interval at which the database steps
333     # used when we create a new rrd
334     # $xmlref = reference to the xml data packet
335     # @data = array containing data items to add
336     # (eg. "packet.cpu.user:user:GAUGE")
337     #
338     sub updaterrd() {
339     my($machine, $type, $date, $step, $xmlref, @data) = @_;
340     # get hold of the xmlhash we have a reference to
341     my(%xmlhash) = %$xmlref;
342     # check if we need to create a new rrd
343     if( ! -f "$rrddir/$machine/$type.rrd") {
344     my(@createdata);
345     # pull the details out of the data we've been given
346     foreach my $dataitem (@data) {
347     if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
348     push @createdata, "$1:$2";
349     }
350     }
351     # call the &makerrd to actually create the rrd
352     print "making new rrd for $rrddir/$machine/$type.rrd\n";
353     &makerrd($machine, $type, $date, $step, @createdata);
354     }
355     # get the details out of the data we've been given
356     my($updateparams) = "$date";
357     foreach my $dataitem (@data) {
358     if($dataitem =~ /^(\S+):\S+:\S+$/) {
359     # pull the values straight out of the xmlhash
360     my($value) = $xmlhash{$1};
361     # if it's undefined we'll set it to 0
362     # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
363     $value = "0" if not defined $value;
364     $updateparams .= ":$value";
365     }
366     }
367     # perform the update
368     RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
369     my($err) = RRDs::error;
370     print STDERR "Error updating $rrddir/$machine/$type.rrd: $err\n" if $err;
371     }
372    
373     #
374     # sub to create a new rrd file
375     #
376     # $machine = name of the machine
377     # (eg. kernow.ukc.ac.uk)
378     # $type = the type of graph for the machine
379     # (eg. cpu)
380     # $start = the date of the first item we want to add
381     # (in seconds since the epoch)
382     # $step = the interval at which the database steps
383     # @data = the data items we want to put in the rrd
384     # in the form: $dsname:dstype
385     # (eg. "size:GAUGE")
386     #
387     sub makerrd() {
388     my($machine, $type, $start, $step, @data) = @_;
389     # check if directory exists for rrd
390     if(! -d "$rrddir/$machine") {
391     # not sure on this umask, but it seems to work?
392     mkdir "$rrddir/$machine", 0777;
393     }
394     my(@rrdcmd);
395     # we'll want to add our first data item at $start,
396     # so we start our rrd $step before that.
397     $start -= $step;
398     push @rrdcmd, "$rrddir/$machine/$type.rrd";
399     push @rrdcmd, "--start=$start";
400     push @rrdcmd, "--step=$step";
401     foreach my $dataitem (@data) {
402     # dataitem should be: "dsname:dstype"
403     if($dataitem =~ /^(\S+):(\S+)$/) {
404     push @rrdcmd, "DS:$1:$2:600:U:U";
405     }
406     }
407     push @rrdcmd, (
408     # 3h in 15s samples
409     "RRA:AVERAGE:0.5:1:720",
410     "RRA:MAX:0.5:1:720",
411     # 1d in 2m samples
412     "RRA:AVERAGE:0.5:8:720",
413     "RRA:MAX:0.5:8:720",
414     # 1w in 15m samples
415     "RRA:AVERAGE:0.5:60:672",
416     "RRA:MAX:0.5:60:672",
417     # 1m in 1hr samples
418     "RRA:AVERAGE:0.5:240:744",
419     "RRA:MAX:0.5:240:744",
420     # 1y in 12hr samples
421     "RRA:AVERAGE:0.5:2880:730",
422     "RRA:MAX:0.5:2880:730",
423     );
424     RRDs::create (@rrdcmd);
425     my($err) = RRDs::error;
426     print STDERR "Error creating rrd for $rrddir/$machine/$type: $err\n" if $err;
427     }