File indexing completed on 2025-08-03 08:20:57
0001
0002
0003
0004
0005
0006
0007
0008
0009
0010
0011
0012
0013
0014 use Tk;
0015 use Tk::FileSelect;
0016 use Time::localtime;
0017 use DirHandle;
0018 use Tk::Image;
0019 use File::Basename;
0020
0021 use strict;
0022
0023
0024
0025
0026 my $interval = 300000;
0027
0028 my %monitors;
0029
0030 my $startall;
0031 my $restartall;
0032 my $stopall;
0033
0034 my %subsysmonitors;
0035
0036 my %excludeall = (
0037 "bbc" => 1,
0038 "ll1" => 1,
0039 "localpol" => 1,
0040 "sepd" => 1,
0041 "spin" => 1,
0042 "tpot" => 1,
0043 "zdc" => 1,
0044 );
0045
0046 sub info {
0047 my $name = shift;
0048 print "$name monitor is supposed to run on ",$monitors{$name}->{"machine"},
0049 ":",$monitors{$name}->{"number"}," pid=",
0050 $monitors{$name}->{"pid"},"\n";
0051 }
0052
0053
0054 sub send_monitor_command {
0055
0056 my $m = shift;
0057 my $swhat = shift;
0058 my $what = lc($swhat);
0059
0060 if ( $what ne "start" && $what ne "stop" && $what ne "restart" ) {
0061 print "send_monitor_command : command $swhat unknown ! Should be start, restart or stop !\n";
0062 return;
0063 }
0064
0065 my $cmd;
0066
0067 if ( $m eq "all" ) {
0068 $cmd = "$ENV{'ONLMON_RUNDIR'}/submit_all.sh $what";
0069 if ( $what eq "restart" )
0070 {
0071 system("restart_monitor_pool.sh");
0072 system("sleep 5");
0073 }
0074 elsif ( $what eq "stop" )
0075 {
0076 system("ssh -x etpool /etc/stop_et.sh");
0077 system("sleep 20");
0078 }
0079 elsif ( $what eq "start" )
0080 {
0081 system("ssh -x etpool /etc/start_et.csh");
0082 system("sleep 10");
0083 }
0084 }
0085 else {
0086 my $va = $monitors{$m}->{"machine"};
0087 my $number = $monitors{$m}->{"number"};
0088
0089 $cmd = "ssh -x $va \$ONLMON_SERVERWATCHER/monserver.sh $what $number";
0090 }
0091 print "send_monitor_command : $cmd\n";
0092 system($cmd);
0093 update($m);
0094
0095 if ( $m eq "all" ) {
0096 $stopall->configure(-state=> ($what eq "stop" ? "disabled" : "active"));
0097 $startall->configure(-state=> ($what eq "stop" ? "active" : "disabled"));
0098 $restartall->configure(-state=> ($what eq "stop" ? "disabled" : "active"));
0099 }
0100 }
0101
0102
0103
0104 sub send_subsys_monitor_command {
0105
0106 my $m = shift;
0107 my $swhat = shift;
0108
0109 my $what = lc($swhat);
0110
0111 if ( $what ne "start" && $what ne "stop" && $what ne "restart" ) {
0112 print "send_monitor_command : command $swhat unknown ! Should be start, restart or stop !\n";
0113 return;
0114 }
0115
0116 my $array_ref = $subsysmonitors{$m}->{"monitors"};
0117 foreach my $moni (sort @$array_ref)
0118 {
0119 print "send_monitor_command : $moni $swhat\n";
0120 send_monitor_command($moni,$swhat);
0121 }
0122 }
0123
0124 sub send_monitor_command_to_all {
0125
0126 my $swhat = shift;
0127 my $what = lc($swhat);
0128
0129 if ( $what ne "start" && $what ne "stop" && $what ne "restart" ) {
0130 print "send_monitor_command : command $swhat unknown ! Should be start, restart or stop !\n";
0131 return;
0132 }
0133
0134 foreach my $moni (sort keys %monitors)
0135 {
0136 send_monitor_command($moni,$swhat);
0137 }
0138
0139 }
0140
0141 sub checkMonitor {
0142
0143 my $m = shift;
0144
0145 if ( $m eq "all" ) {
0146 foreach my $monitor ( sort keys %monitors ) {
0147 checkMonitor($monitor);
0148 }
0149 return;
0150 }
0151
0152 my $va = $monitors{$m}->{"machine"};
0153 my $number = $monitors{$m}->{"number"};
0154
0155 my $cmd = "ssh -x phnxrc\@$va \$ONLMON_SERVERWATCHER/monserver.sh status $number";
0156
0157 my $bad=1;
0158
0159 print ctime()." Checking $m on $va ...";
0160
0161 open(STATUS,"$cmd |");
0162 my @result;
0163 while (<STATUS>) {
0164 push @result,$_;
0165 print "result: $_";
0166 }
0167 close STATUS;
0168 my $nres = scalar @result;
0169 if ( scalar @result == 2 )
0170 {
0171 $_=$result[1];
0172 if ( /$va/ ) {
0173 my @s = split " ";
0174 $monitors{$m}->{"pid"} = $s[0];
0175 $bad=0;
0176 }
0177 }
0178
0179 if ( $bad != 0 ) {
0180 $monitors{$m}->{"nameButton"}->configure(-background=>'red');
0181 $monitors{$m}->{"startButton"}->configure(-state=>'active');
0182 $monitors{$m}->{"restartButton"}->configure(-state=>'active');
0183 $monitors{$m}->{"stopButton"}->configure(-state=>'disabled');
0184 }
0185 else {
0186 $monitors{$m}->{"nameButton"}->configure(-background=>'green');
0187 $monitors{$m}->{"startButton"}->configure(-state=>'disabled');
0188 $monitors{$m}->{"restartButton"}->configure(-state=>'active');
0189 $monitors{$m}->{"stopButton"}->configure(-state=>'active');
0190 }
0191
0192 print " ".ctime()." done\n";
0193 }
0194
0195
0196 sub getMonitors() {
0197
0198 my $d = new DirHandle "$ENV{'ONLMON_SERVERWATCHER'}";
0199
0200 if (defined $d) {
0201 while (defined($_ = $d->read)) {
0202 if (/monitorserver/ && /cmd/ && $_ !~ /~/) {
0203 my $cmdfile = "$ENV{'ONLMON_SERVERWATCHER'}/$_";
0204 my ($va,$monitor,$number)=decode($cmdfile);
0205 decode_new($cmdfile);
0206 }
0207 }
0208 undef $d;
0209 }
0210 }
0211
0212
0213 sub decode_new {
0214
0215 my $cmdfile = shift;
0216
0217 my @result = split /\./,basename($cmdfile);
0218
0219 my $va = $result[0];
0220 my $number = $result[2];
0221
0222 open FILE, $cmdfile or die "Cannot open $cmdfile : $!";
0223
0224
0225 while (<FILE>) {
0226 chomp;
0227 if ( /^\/\// && /monitor:/ ) {
0228 my $line = $_;
0229 my $pos = index($line,"monitor:");
0230 my $monitor = substr($line,$pos+length("monitor: "));
0231 $monitors{$monitor} = {};
0232 $monitors{$monitor}->{"machine"}=$va;
0233 $monitors{$monitor}->{"number"}=$number;
0234 $monitors{$monitor}->{"pid"}=0;
0235 my @sp1 = split / /,$monitor;
0236 if (! exists $excludeall{$monitor})
0237 {
0238 push@{$subsysmonitors{$sp1[0]}->{"monitors"}},$monitor;
0239 }
0240 }
0241 }
0242
0243 close FILE;
0244
0245
0246
0247
0248
0249
0250
0251
0252
0253
0254 }
0255
0256
0257 sub decode {
0258
0259 my $cmdfile = shift;
0260
0261 my @result = split /\./,$cmdfile;
0262
0263 my $va = $result[0];
0264 my $number = $result[2];
0265
0266 open FILE, $cmdfile or die "Cannot open $cmdfile : $!";
0267
0268 my $line = <FILE>;
0269
0270 close FILE;
0271
0272 my $pos = index($line,"run_");
0273
0274 $line = substr($line,$pos+length("run_"));
0275
0276 @result = split /\./,$line;
0277
0278 my $monitor = $result[0];
0279
0280 return ($va,$monitor,$number);
0281 }
0282
0283
0284
0285
0286
0287 getMonitors();
0288
0289 my $mw = MainWindow->new;
0290 my $column=0;
0291 my $row=0;
0292 my $ncolumns=10;
0293
0294 foreach my $m ( sort keys %monitors ) {
0295 my $name = $monitors{$m}->{"nameButton"} =
0296 $mw->Button(-text => uc($m),
0297 -relief=>'flat',
0298 -width=>10);
0299 $name->bind('<Button-3>' => sub { info($m); } );
0300 $name->bind('<Button-1>' => sub { checkMonitor($m); } );
0301 $name->grid(-column=>$column,-row=>$row);
0302 $name->configure(-background=>'black');
0303
0304 my $machine = $monitors{$m}->{"machineLabel"} =
0305 $mw->Label(-text=> $monitors{$m}->{"machine"});
0306 $machine->grid(-column=>$column,-row=>$row+1);
0307
0308 my $start = $monitors{$m}->{"startButton"} =
0309 $mw->Button(-text => "Start",
0310 -relief => "raised", -width=>5);
0311 $start->grid(-column=>$column+1,-row=>$row);
0312 $start->configure(-state=>'disabled',
0313 -command=> sub { send_monitor_command($m,"start") } );
0314
0315 my $restart = $monitors{$m}->{"restartButton"} =
0316 $mw->Button(-text => "Restart",
0317 -relief => "raised", -width=>5);
0318 $restart->grid(-column=>$column+1,-row=>$row+1);
0319 $restart->configure(-state=>'active',
0320 -command=> sub { send_monitor_command($m,"restart") } );
0321
0322 my $stop = $monitors{$m}->{"stopButton"} =
0323 $mw->Button(-text => "Stop",
0324 -relief => "raised", -width=>5);
0325 $stop->grid(-column=>$column+1,-row=>$row+2);
0326 $stop->configure(-command=>sub { send_monitor_command($m,"stop") } );
0327
0328 $column+=2;
0329
0330 if ( $column/2 >= $ncolumns ) {
0331 $column=0;
0332 $row+=3;
0333 }
0334
0335 }
0336
0337 foreach my $m ( sort keys %subsysmonitors ) {
0338 my $button_label = sprintf("All %s",$m);
0339 my $name = $subsysmonitors{$m}->{"nameButton"} =
0340 $mw->Button(-text => uc($button_label),
0341 -relief=>'flat',
0342 -width=>10);
0343
0344
0345 $name->grid(-column=>$column,-row=>$row);
0346 $name->configure(-background=>'cyan');
0347
0348 my $start = $subsysmonitors{$m}->{"startButton"} =
0349 $mw->Button(-text => "Start All",
0350 -relief => "raised", -width=>5);
0351 $start->grid(-column=>$column+1,-row=>$row);
0352 $start->configure(-state=>'active',
0353 -command=> sub { send_subsys_monitor_command($m,"start") } );
0354
0355 my $restart = $subsysmonitors{$m}->{"restartButton"} =
0356 $mw->Button(-text => "Restart All",
0357 -relief => "raised", -width=>5);
0358 $restart->grid(-column=>$column+1,-row=>$row+1);
0359 $restart->configure(-state=>'active',
0360 -command=> sub { send_subsys_monitor_command($m,"restart") } );
0361
0362 my $stop = $subsysmonitors{$m}->{"stopButton"} =
0363 $mw->Button(-text => "Stop All",
0364 -relief => "raised", -width=>5);
0365 $stop->grid(-column=>$column+1,-row=>$row+2);
0366 $stop->configure(-state=>'active', -command=>sub { send_subsys_monitor_command($m,"stop") } );
0367
0368 $column+=2;
0369
0370 if ( $column/2 >= $ncolumns ) {
0371 $column=0;
0372 $row+=3;
0373 }
0374
0375 }
0376
0377
0378 $column = 18;
0379 my $allbutton = $mw->Button(-text => "Seriously ALL",
0380 -relief=>'flat',
0381 -width=>10);
0382 $allbutton->grid(-column=>$column,-row=>$row);
0383 $allbutton->configure(-background=>'orange');
0384
0385 $stopall = $mw->Button(-text=>'Stop All',
0386 -command=> sub { send_monitor_command_to_all("stop") });
0387
0388 $stopall->grid(-column=>$column+1,-row=>$row);
0389 $stopall->configure(-state=>'active',-width=>7);
0390 $row++;
0391 $startall = $mw->Button(-text=>'Start All',
0392 -command=> sub { send_monitor_command_to_all("start") });
0393 $startall->grid(-column=>$column+1,-row=>$row);
0394 $startall->configure(-state=>'active',-width=>7);
0395 $row++;
0396 $restartall = $mw->Button(-text=>'Restart All',
0397 -command=> sub { send_monitor_command_to_all("restart") });
0398 $restartall->grid(-column=>$column+1,-row=>$row);
0399 $restartall->configure(-state=>'active',-width=>7);
0400
0401 my $logoimg = $mw->Photo('logo',-file=>"$ENV{'ONLMON_SERVERWATCHER'}/sphenix-logo_transparent_small.gif");
0402 my $logo = $mw->Label('-image'=>'logo');
0403 $row+=$ncolumns;
0404 $logo->grid(-column=>0,-row=>$row,-columnspan=>2);
0405
0406 my $quitButton = $mw->Label(-text => "Quit", -relief => 'raised');
0407 $quitButton->grid(-column=>2,-row=>$row);
0408 $quitButton->bind('<Button-1>' => sub {$mw->destroy});
0409 $quitButton->configure(-width=>5);
0410
0411
0412 my $date = $mw->Label(-text => "", -relief => 'sunk');
0413 $date->grid(-column=>(2*$ncolumns-2),-row=>$row,-columnspan=>2,-sticky=>'ew');
0414
0415 $mw->repeat($interval,\&update);
0416
0417 print "\n\n";
0418
0419 &update("all");
0420
0421 MainLoop;
0422
0423
0424 sub update() {
0425
0426 my $m = shift;
0427
0428 if (defined($m)) {
0429 checkMonitor($m);
0430 }
0431 else {
0432 checkMonitor("all");
0433 }
0434 $date->configure(-text => "Last check : " . ctime());
0435 }