ViewVC Help
View File | Revision Log | Show Annotations | Revision Graph | Root Listing
root/i-scream/experimental/reports/graphing/watch.pl
Revision: 1.6
Committed: Wed Mar 13 20:50:48 2002 UTC (22 years, 2 months ago) by tdb
Content type: text/plain
Branch: MAIN
Changes since 1.5: +5 -2 lines
Log Message:
Added a graph of "the last year". Also fixed a minor bug with the script to
create the rrd's. As an aside, I've noticed the grid on the graphs seems to
differ between the different graphs. I'm sure this is because I've got some
of my numbers a bit silly ;) oh well :)

File Contents

# Content
1 #!/usr/bin/perl -w
2
3 # -----------------------------------------------------------
4 # i-scream graph generation scripts
5 # http://www.i-scream.org.uk
6 #
7 # Generates rrd databases for i-scream data by connecting to
8 # the i-scream server and collecting data.
9 #
10 # $Author: tdb $
11 # $Id: watch.pl,v 1.5 2002/03/10 15:42:19 tdb Exp $
12 #------------------------------------------------------------
13
14 ## TODO
15 # ought to think about cleaning up when we restart?
16 # -- old queue data etc
17
18 $| = 1;
19
20 use strict;
21 use iscream::XMLParser;
22 use IO::Socket;
23 use RRDs;
24
25 # Base directory for images
26 # (a directory will be constructed for each host under this)
27 my($imgdir) = "/home/tdb/public_html/rrd";
28
29 # Location of RRD databases
30 my($rrddir) = "/u1/i-scream/rrd";
31
32 # for reference:
33 # ch -> hex: $hex = sprintf("%02x", ord($ch));
34 # hex -> ch: $ch = chr(hex($hex));
35
36 # / converted to a decimal then hex'd
37 my($hex_slash) = "_2f";
38 # _ converted to a decimal then hex'd
39 my($hex_underscore) = "_5f";
40
41 # step interval in the rrd databases
42 my($rrdstep) = 15;
43
44 # time to wait (in seconds) before retrying a connection
45 my($retry_wait) = 10;
46
47 if (@ARGV != 2) {
48 die "Usage: watch.pl [i-scream client interface] [TCP port]\n";
49 }
50
51 # user supplied client interface server and port
52 my($addr) = $ARGV[0];
53 my($cport) = $ARGV[1];
54
55 while(1) {
56
57 print "Connecting control channel to port $cport on $addr...\n";
58
59 # attempt to connect the control channel
60 my($csock) = new IO::Socket::INET(
61 PeerAddr => $addr,
62 PeerPort => $cport,
63 Proto => 'tcp'
64 );
65
66 # if socket isn't defined connection failed
67 if (!defined $csock) {
68 print STDERR "ERROR: Could not connect control channel to $addr:$cport.\n";
69 print STDERR "Please check that there is an i-scream server at this address.\n";
70 &wait_then_retry();
71 next;
72 }
73
74 my($response);
75
76 # client interface should send it's protocol ID
77 # we know about "PROTOCOL 1.1", and will only accept the same
78 $response = <$csock>;
79 if ($response && $response ne "PROTOCOL 1.1\n") {
80 print STDERR "The i-scream server sent an unexpected protocol ID: $response\n";
81 close($csock);
82 &wait_then_retry();
83 next;
84 }
85
86 # send our identifier to the client interface
87 print $csock "rrdgraphing\n";
88 $response = <$csock>;
89 if ($response && $response ne "OK\n") {
90 print STDERR "Received unexpected response: $response\n";
91 close($csock);
92 &wait_then_retry();
93 next;
94 }
95
96 # tell the client interface we'd like to start the data channel
97 print $csock "STARTDATA\n";
98
99 # the response should be the socket to connect the data channel to
100 $response = <$csock>;
101 chomp $response;
102
103 my($dport) = $response;
104 print "Connecting data channel to port $dport on $addr...\n";
105
106 # attempt to connect the data channel
107 my($dsock) = new IO::Socket::INET(
108 PeerAddr => $addr,
109 PeerPort => $dport,
110 Proto => 'tcp'
111 ) or die "arse?";
112
113 # if socket isn't defined connection failed
114 if (!defined $dsock) {
115 print STDERR "ERROR: Could not connect data channel to $addr:$dport.\n";
116 print STDERR "Failure in communications.\n";
117 close($csock);
118 &wait_then_retry();
119 next;
120 }
121
122 # the data channel should now be sending us data!
123
124 # call sub to process data being received over the data channel
125 &processdata($dsock);
126
127 # data processing has stopped, close sockets
128 close($csock);
129 close($dsock);
130
131 # wait before retrying
132 &wait_then_retry();
133 }
134
135 # we'll never reach here... unless 1 becomes false for some reason ;)
136 exit 0;
137
138
139 #
140 # wait for a while before retrying
141 #
142 sub wait_then_retry() {
143 print STDERR "Will retry connection to i-scream server in $retry_wait seconds.\n\n";
144 sleep $retry_wait;
145 }
146
147 #
148 # Given the socket of the data channel will process all
149 # the incoming XML data, creating and updating the appropriate
150 # database files.
151 #
152 # $dsock = socket connected to the data channel
153 #
154 sub processdata() {
155 # the socket connected to the data channel
156 my($dsock) = @_;
157 # save us recreating this variable each time we loop
158 my($xml);
159
160 while(1) {
161 # read data
162 $xml = <$dsock>;
163
164 # something odd has happened
165 last if not defined $xml;
166
167 # attempt to parse the data
168 my($err, %xmlhash) = &iscream::XMLParser::parse($xml);
169 if($err) {
170 print STDERR "Skipped, XML did not parse: $xml";
171 next;
172 }
173
174 # standard data packet
175 if($xmlhash{"packet.attributes.type"} eq "data") {
176 my($machine) = $xmlhash{"packet.attributes.machine_name"};
177 my($date) = $xmlhash{"packet.attributes.date"};
178
179 # make directory for machine
180 if(! -d "$rrddir/$machine") {
181 # not sure on this umask, but it seems to work?
182 mkdir "$rrddir/$machine", 0777;
183 }
184
185 my(@data);
186
187 # cpu
188 @data = ( "packet.cpu.idle:idle:GAUGE",
189 "packet.cpu.user:user:GAUGE",
190 "packet.cpu.kernel:kernel:GAUGE",
191 "packet.cpu.swap:swap:GAUGE",
192 "packet.cpu.iowait:iowait:GAUGE",
193 );
194 &updaterrd($machine, "cpu", $date, $rrdstep, \%xmlhash, @data);
195
196 # mem
197 @data = ( "packet.memory.free:free:GAUGE",
198 "packet.memory.total:total:GAUGE",
199 );
200 &updaterrd($machine, "mem", $date, $rrdstep, \%xmlhash, @data);
201
202 # load
203 @data = ( "packet.load.load1:load1:GAUGE",
204 "packet.load.load5:load5:GAUGE",
205 "packet.load.load15:load15:GAUGE",
206 );
207 &updaterrd($machine, "load", $date, $rrdstep, \%xmlhash, @data);
208
209 # processes
210 @data = ( "packet.processes.cpu:cpu:GAUGE",
211 "packet.processes.sleeping:sleeping:GAUGE",
212 "packet.processes.stopped:stopped:GAUGE",
213 "packet.processes.total:total:GAUGE",
214 "packet.processes.zombie:zombie:GAUGE",
215 );
216 &updaterrd($machine, "proc", $date, $rrdstep, \%xmlhash, @data);
217
218 # swap
219 @data = ( "packet.swap.free:free:GAUGE",
220 "packet.swap.total:total:GAUGE",
221 );
222 &updaterrd($machine, "swap", $date, $rrdstep, \%xmlhash, @data);
223
224 # users
225 @data = ( "packet.users.count:count:GAUGE",
226 );
227 &updaterrd($machine, "users", $date, $rrdstep, \%xmlhash, @data);
228
229 # disk
230 my($i) = 0;
231 while(defined $xmlhash{"packet.disk.p$i.attributes.mount"}) {
232 my($mount) = $xmlhash{"packet.disk.p$i.attributes.mount"};
233 $mount =~ s/_/$hex_underscore/g;
234 $mount =~ s/\//$hex_slash/g;
235 @data = ( "packet.disk.p$i.attributes.kbytes:kbytes:GAUGE",
236 "packet.disk.p$i.attributes.used:used:GAUGE",
237 );
238 &updaterrd($machine, "disk-$mount", $date, $rrdstep, \%xmlhash, @data);
239 ++$i;
240 }
241 }
242
243 # queue statistics packet
244 elsif($xmlhash{"packet.attributes.type"} eq "queueStat") {
245 # psuedo machine for internal server stuff
246 my($machine) = "i-scream-server";
247 # make directory
248 if(! -d "$rrddir/$machine") {
249 # not sure on this umask, but it seems to work?
250 mkdir "$rrddir/$machine", 0777;
251 }
252 my($hash) = $xmlhash{"packet.attributes.hashCode"};
253 my($date) = $xmlhash{"packet.attributes.date"};
254 my($name) = $xmlhash{"packet.attributes.name"};
255 # take a look to see if we have a shutdown packet...
256 if($xmlhash{"packet.attributes.shutdown"} && $xmlhash{"packet.attributes.shutdown"} eq "true") {
257 unlink <$rrddir/$machine/$hash\_*.rrd>, "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
258 next;
259 }
260 # look through to see how many internal queues we have
261 my($i) = 0;
262 while(defined $xmlhash{"packet.queue.attributes.queue$i"}) {
263 # see if the queue has been removed
264 if($xmlhash{"packet.queue.attributes.queue$i"} eq "[deleted]") {
265 # delete the queues rrd
266 unlink "$rrddir/$machine/$hash\_$i.rrd";
267 # are there any other rrd's left on this queue? if not, cleanup.
268 # get a list of any that may be still there..
269 opendir(DIR, "$rrddir/$machine");
270 my(@rrdcountfiles) = grep { -f "$rrddir/$machine/$_" && /^$hash\_\d+.rrd$/ } readdir(DIR);
271 closedir DIR;
272 # count them (+1 because an empty array is size -1)
273 my($rrdcount) = $#rrdcountfiles + 1;
274 if($rrdcount == 0) {
275 # clean up the def file and any images
276 unlink "$rrddir/$machine/$hash.def", <$imgdir/$machine/$hash*.png>;
277 }
278 ++$i;
279 next;
280 }
281 # the &updaterrd will also do this check, but we want
282 # to write our def file out first
283 if( ! -f "$rrddir/$machine/$hash\_$i.rrd" ) {
284 open(DEF, ">$rrddir/$machine/$hash.def");
285 print DEF $name;
286 close DEF;
287 }
288 my(@data) = ( "packet.queue.attributes.queue$i:size:GAUGE",
289 "packet.queue.attributes.total:total:COUNTER",
290 );
291 &updaterrd($machine, "$hash\_$i", $date, $rrdstep, \%xmlhash, @data);
292 ++$i;
293 }
294 }
295 else {
296 #print STDERR "SKIPPED: valid xml, but not a data or statistics packet\n";
297 }
298 }
299
300 # we'll now return from this sub and reconnect
301 print STDERR "Data channel socket gave no data, bailing out...\n";
302 }
303
304 #
305 # sub to update an rrd file
306 #
307 # $machine = name of the machine
308 # (eg. kernow.ukc.ac.uk)
309 # $type = the type of graph for the machine
310 # (eg. cpu)
311 # $date = the date of the item we want to add
312 # (in seconds since the epoch)
313 # $step = the interval at which the database steps
314 # used when we create a new rrd
315 # $xmlref = reference to the xml data packet
316 # @data = array containing data items to add
317 # (eg. "packet.cpu.user:user:GAUGE")
318 #
319 sub updaterrd() {
320 my($machine, $type, $date, $step, $xmlref, @data) = @_;
321 # get hold of the xmlhash we have a reference to
322 my(%xmlhash) = %$xmlref;
323 # check if we need to create a new rrd
324 if( ! -f "$rrddir/$machine/$type.rrd") {
325 my(@createdata);
326 # pull the details out of the data we've been given
327 foreach my $dataitem (@data) {
328 if($dataitem =~ /^\S+:(\S+):(\S+)$/) {
329 push @createdata, "$1:$2";
330 }
331 }
332 # call the &makerrd to actually create the rrd
333 print "making new rrd for $rrddir/$machine/$type.rrd\n";
334 &makerrd($machine, $type, $date, $step, @createdata);
335 }
336 # get the details out of the data we've been given
337 my($updateparams) = "$date";
338 foreach my $dataitem (@data) {
339 if($dataitem =~ /^(\S+):\S+:\S+$/) {
340 # pull the values straight out of the xmlhash
341 my($value) = $xmlhash{$1};
342 # if it's undefined we'll set it to 0
343 # this probably shouldn't happen, but it'd be best to handle it "nicely" :)
344 $value = "0" if not defined $value;
345 $updateparams .= ":$value";
346 }
347 }
348 # perform the update
349 RRDs::update ("$rrddir/$machine/$type.rrd", $updateparams);
350 my($err) = RRDs::error;
351 print STDERR "Error updating $rrddir/$machine/$type.rrd: $err\n" if $err;
352 }
353
354 #
355 # sub to create a new rrd file
356 #
357 # $machine = name of the machine
358 # (eg. kernow.ukc.ac.uk)
359 # $type = the type of graph for the machine
360 # (eg. cpu)
361 # $start = the date of the first item we want to add
362 # (in seconds since the epoch)
363 # $step = the interval at which the database steps
364 # @data = the data items we want to put in the rrd
365 # in the form: $dsname:dstype
366 # (eg. "size:GAUGE")
367 #
368 sub makerrd() {
369 my($machine, $type, $start, $step, @data) = @_;
370 # check if directory exists for rrd
371 if(! -d "$rrddir/$machine") {
372 # not sure on this umask, but it seems to work?
373 mkdir "$rrddir/$machine", 0777;
374 }
375 my(@rrdcmd);
376 # we'll want to add our first data item at $start,
377 # so we start our rrd $step before that.
378 $start -= $step;
379 push @rrdcmd, "$rrddir/$machine/$type.rrd";
380 push @rrdcmd, "--start=$start";
381 push @rrdcmd, "--step=$step";
382 foreach my $dataitem (@data) {
383 # dataitem should be: "dsname:dstype"
384 if($dataitem =~ /^(\S+):(\S+)$/) {
385 push @rrdcmd, "DS:$1:$2:600:U:U";
386 }
387 }
388 push @rrdcmd, (
389 # 3h in 15s samples
390 "RRA:AVERAGE:0.5:1:720",
391 "RRA:MAX:0.5:1:720",
392 # 1d in 2m samples
393 "RRA:AVERAGE:0.5:8:720",
394 "RRA:MAX:0.5:8:720",
395 # 1w in 15m samples
396 "RRA:AVERAGE:0.5:60:672",
397 "RRA:MAX:0.5:60:672",
398 # 1m in 1hr samples
399 "RRA:AVERAGE:0.5:240:744",
400 "RRA:MAX:0.5:240:744",
401 # 1y in 12hr samples
402 "RRA:AVERAGE:0.5:2880:730",
403 "RRA:MAX:0.5:2880:730",
404 );
405 RRDs::create (@rrdcmd);
406 my($err) = RRDs::error;
407 print STDERR "Error creating rrd for $rrddir/$machine/$type: $err\n" if $err;
408 }