Back to home page

sPhenix code displayed by LXR

 
 

    


File indexing completed on 2025-08-03 08:20:57

0001 #!/usr/bin/perl -w
0002 
0003 # Script to monitor the status of the online monitoring servers, and
0004 # to start/restart/stop them, individually or all at once.
0005 #
0006 # Normal operation is checking every $interval milliseconds (default=3min)
0007 #
0008 # Left-Click on a subsystem button will check immediate check.
0009 # Right-click will dump on starting shell window the machine:port:pid for this
0010 # subsystem.
0011 #
0012 # L. Aphecetche (aphecetc@in2p3.fr)
0013 
0014 use Tk;
0015 use Tk::FileSelect;
0016 use Time::localtime;
0017 use DirHandle;
0018 use Tk::Image;
0019 use File::Basename;
0020 
0021 use strict;
0022 
0023 # Check frequency. Don't set it too high, as it takes some time to check
0024 # all the servers (about 15 seconds at least).
0025 
0026 my $interval = 300000; # ms => 5 min
0027 
0028 my %monitors;
0029 
0030 my $startall;
0031 my $restartall;
0032 my $stopall;
0033 
0034 my %subsysmonitors;
0035 
0036 my %excludeall = (
0037     "bbc" => 1,
0038     "ll1" => 1,
0039     "localpol" => 1,
0040     "sepd" => 1,
0041     "spin" => 1,
0042     "tpot" => 1,
0043     "zdc" => 1,
0044     );
0045 #_____________________________________________________________________________
0046 sub info {
0047     my $name = shift;
0048     print "$name monitor is supposed to run on ",$monitors{$name}->{"machine"},
0049     ":",$monitors{$name}->{"number"}," pid=",
0050     $monitors{$name}->{"pid"},"\n";
0051 }
0052 
0053 #_____________________________________________________________________________
0054 sub send_monitor_command {
0055 
0056     my $m = shift;
0057     my $swhat = shift;
0058     my $what = lc($swhat);
0059 
0060     if ( $what ne "start" && $what ne "stop" && $what ne "restart" ) {
0061     print "send_monitor_command : command $swhat unknown ! Should be start, restart or stop !\n";
0062     return;
0063     }
0064 
0065     my $cmd;
0066 
0067     if ( $m eq "all" ) {
0068     $cmd = "$ENV{'ONLMON_RUNDIR'}/submit_all.sh $what";
0069     if ( $what eq "restart" )
0070     {
0071         system("restart_monitor_pool.sh");
0072         system("sleep 5");
0073     }
0074     elsif ( $what eq "stop" )
0075     {
0076         system("ssh -x etpool /etc/stop_et.sh");
0077         system("sleep 20");
0078     }
0079     elsif ( $what eq "start" )
0080     {
0081         system("ssh -x etpool /etc/start_et.csh");
0082         system("sleep 10");
0083     }
0084     }
0085     else {
0086     my $va = $monitors{$m}->{"machine"};
0087     my $number = $monitors{$m}->{"number"};
0088     #    $cmd = "ssh -x $va \$ONLMON_MAIN/bin/monserver.csh $what $number";
0089     $cmd = "ssh -x $va \$ONLMON_SERVERWATCHER/monserver.sh $what $number";
0090     }
0091     print "send_monitor_command : $cmd\n";
0092     system($cmd);
0093     update($m);
0094 
0095     if ( $m eq "all" ) {
0096     $stopall->configure(-state=> ($what eq "stop" ? "disabled" : "active"));
0097     $startall->configure(-state=> ($what eq "stop" ? "active" : "disabled"));
0098     $restartall->configure(-state=> ($what eq "stop" ? "disabled" : "active"));
0099     }
0100 }
0101 
0102 
0103 
0104 sub send_subsys_monitor_command {
0105 
0106     my $m = shift;
0107     my $swhat = shift;
0108 
0109     my $what = lc($swhat);
0110 
0111     if ( $what ne "start" && $what ne "stop" && $what ne "restart" ) {
0112     print "send_monitor_command : command $swhat unknown ! Should be start, restart or stop !\n";
0113     return;
0114     }
0115 
0116     my $array_ref = $subsysmonitors{$m}->{"monitors"};
0117     foreach my $moni (sort @$array_ref)
0118     {
0119     print "send_monitor_command : $moni $swhat\n";
0120     send_monitor_command($moni,$swhat);
0121     }
0122 }
0123 
0124 sub send_monitor_command_to_all {
0125 
0126     my $swhat = shift;
0127     my $what = lc($swhat);
0128 
0129     if ( $what ne "start" && $what ne "stop" && $what ne "restart" ) {
0130     print "send_monitor_command : command $swhat unknown ! Should be start, restart or stop !\n";
0131     return;
0132     }
0133 
0134     foreach my $moni (sort keys %monitors)
0135     {
0136     send_monitor_command($moni,$swhat);
0137     }
0138 
0139 }
0140 #_____________________________________________________________________________
0141 sub checkMonitor {
0142 
0143     my $m = shift;
0144 
0145     if ( $m eq "all" ) {
0146     foreach my $monitor ( sort keys %monitors ) {
0147         checkMonitor($monitor);
0148     }
0149     return;
0150     }
0151 
0152     my $va = $monitors{$m}->{"machine"};
0153     my $number = $monitors{$m}->{"number"};
0154     #  my $cmd = "ssh -x phnxrc\@$va \$ONLMON_MAIN/bin/monserver.sh status $number";
0155     my $cmd = "ssh -x phnxrc\@$va \$ONLMON_SERVERWATCHER/monserver.sh status $number";
0156 
0157     my $bad=1;
0158 
0159     print ctime()." Checking $m on $va ...";
0160 
0161     open(STATUS,"$cmd |");
0162     my @result;
0163     while (<STATUS>) {
0164     push @result,$_;
0165     print "result: $_"; 
0166     }  
0167     close STATUS;
0168     my $nres = scalar @result;
0169     if ( scalar @result == 2 ) 
0170     {
0171     $_=$result[1];
0172     if ( /$va/ ) {
0173         my @s = split " ";
0174         $monitors{$m}->{"pid"} = $s[0];
0175         $bad=0;
0176     }
0177     }
0178 
0179     if ( $bad != 0 ) {
0180     $monitors{$m}->{"nameButton"}->configure(-background=>'red');
0181     $monitors{$m}->{"startButton"}->configure(-state=>'active');
0182     $monitors{$m}->{"restartButton"}->configure(-state=>'active');
0183     $monitors{$m}->{"stopButton"}->configure(-state=>'disabled');
0184     }
0185     else {
0186     $monitors{$m}->{"nameButton"}->configure(-background=>'green');
0187     $monitors{$m}->{"startButton"}->configure(-state=>'disabled');
0188     $monitors{$m}->{"restartButton"}->configure(-state=>'active');
0189     $monitors{$m}->{"stopButton"}->configure(-state=>'active');
0190     }
0191 
0192     print " ".ctime()." done\n";
0193 }
0194 
0195 #_____________________________________________________________________________
0196 sub getMonitors() {
0197 
0198     my $d = new DirHandle "$ENV{'ONLMON_SERVERWATCHER'}";
0199 
0200     if (defined $d) {
0201     while (defined($_ = $d->read)) {
0202         if (/monitorserver/ && /cmd/ && $_ !~ /~/) {
0203         my $cmdfile = "$ENV{'ONLMON_SERVERWATCHER'}/$_";
0204         my ($va,$monitor,$number)=decode($cmdfile);
0205         decode_new($cmdfile);
0206         }
0207     }
0208     undef $d;
0209     }
0210 }
0211 
0212 #_____________________________________________________________________________
0213 sub decode_new {
0214 
0215     my $cmdfile = shift;
0216     
0217     my @result = split /\./,basename($cmdfile);
0218 
0219     my $va = $result[0];
0220     my $number = $result[2];
0221 
0222     open FILE, $cmdfile or die "Cannot open $cmdfile : $!";
0223 
0224     
0225     while (<FILE>) {
0226     chomp;
0227     if ( /^\/\// && /monitor:/ ) {
0228         my $line = $_;
0229         my $pos = index($line,"monitor:");
0230         my $monitor = substr($line,$pos+length("monitor: "));
0231         $monitors{$monitor} = {};
0232         $monitors{$monitor}->{"machine"}=$va;
0233         $monitors{$monitor}->{"number"}=$number;
0234         $monitors{$monitor}->{"pid"}=0;
0235         my @sp1 = split / /,$monitor;
0236         if (! exists $excludeall{$monitor})
0237         {
0238         push@{$subsysmonitors{$sp1[0]}->{"monitors"}},$monitor;
0239         }
0240     }
0241     }
0242 
0243     close FILE;
0244     # print "%subsysmonitors\n";
0245     # foreach my $m (sort keys %subsysmonitors)
0246     # {
0247     #     print "looking at $m\n";
0248     #     my $array_ref = $subsysmonitors{$m}->{"monitors"};
0249     #     foreach my $node (sort @$array_ref)
0250     #     {#
0251     #     print "key: $node\n";
0252     #      }
0253     #  }
0254 }
0255 
0256 #_____________________________________________________________________________
0257 sub decode {
0258 
0259     my $cmdfile = shift;
0260     
0261     my @result = split /\./,$cmdfile;
0262 
0263     my $va = $result[0];
0264     my $number = $result[2];
0265 
0266     open FILE, $cmdfile or die "Cannot open $cmdfile : $!";
0267 
0268     my $line = <FILE>;
0269 
0270     close FILE;
0271 
0272     my $pos = index($line,"run_");
0273 
0274     $line = substr($line,$pos+length("run_"));
0275 
0276     @result = split /\./,$line;
0277 
0278     my $monitor = $result[0];
0279 
0280     return ($va,$monitor,$number);
0281 }
0282 
0283 #_____________________________________________________________________________
0284 #_____________________________________________________________________________
0285 #_____________________________________________________________________________
0286 
0287 getMonitors();
0288 
0289 my $mw = MainWindow->new;
0290 my $column=0;
0291 my $row=0;
0292 my $ncolumns=10;
0293 
0294 foreach my $m ( sort keys %monitors ) {
0295     my $name = $monitors{$m}->{"nameButton"} = 
0296     $mw->Button(-text => uc($m),
0297             -relief=>'flat',
0298             -width=>10);
0299     $name->bind('<Button-3>' => sub { info($m); } );
0300     $name->bind('<Button-1>' => sub { checkMonitor($m); } );
0301     $name->grid(-column=>$column,-row=>$row);
0302     $name->configure(-background=>'black');
0303 
0304     my $machine = $monitors{$m}->{"machineLabel"} =
0305     $mw->Label(-text=> $monitors{$m}->{"machine"});
0306     $machine->grid(-column=>$column,-row=>$row+1);
0307 
0308     my $start = $monitors{$m}->{"startButton"} = 
0309     $mw->Button(-text => "Start",
0310             -relief => "raised", -width=>5);
0311     $start->grid(-column=>$column+1,-row=>$row);
0312     $start->configure(-state=>'disabled', 
0313               -command=> sub { send_monitor_command($m,"start") } );
0314 
0315     my $restart = $monitors{$m}->{"restartButton"} = 
0316     $mw->Button(-text => "Restart",
0317             -relief => "raised", -width=>5);
0318     $restart->grid(-column=>$column+1,-row=>$row+1);
0319     $restart->configure(-state=>'active', 
0320             -command=> sub { send_monitor_command($m,"restart") } );
0321 
0322     my $stop = $monitors{$m}->{"stopButton"} = 
0323     $mw->Button(-text => "Stop",
0324             -relief => "raised", -width=>5);
0325     $stop->grid(-column=>$column+1,-row=>$row+2);  
0326     $stop->configure(-command=>sub { send_monitor_command($m,"stop") } );
0327 
0328     $column+=2;
0329 
0330     if ( $column/2 >= $ncolumns ) {
0331     $column=0;
0332     $row+=3;
0333     }
0334 
0335 }
0336 
0337 foreach my $m ( sort keys %subsysmonitors ) {
0338     my $button_label = sprintf("All %s",$m);
0339     my $name = $subsysmonitors{$m}->{"nameButton"} =
0340     $mw->Button(-text => uc($button_label),
0341             -relief=>'flat',
0342             -width=>10);
0343     #  $name->bind('<Button-3>' => sub { info($m); } );
0344     #  $name->bind('<Button-1>' => sub { checkMonitor($m); } );
0345     $name->grid(-column=>$column,-row=>$row);
0346     $name->configure(-background=>'cyan');
0347 
0348     my $start = $subsysmonitors{$m}->{"startButton"} =
0349     $mw->Button(-text => "Start All",
0350             -relief => "raised", -width=>5);
0351     $start->grid(-column=>$column+1,-row=>$row);
0352     $start->configure(-state=>'active',
0353               -command=> sub { send_subsys_monitor_command($m,"start") } );
0354 
0355     my $restart = $subsysmonitors{$m}->{"restartButton"} =
0356     $mw->Button(-text => "Restart All",
0357             -relief => "raised", -width=>5);
0358     $restart->grid(-column=>$column+1,-row=>$row+1);
0359     $restart->configure(-state=>'active',
0360             -command=> sub { send_subsys_monitor_command($m,"restart") } );
0361 
0362     my $stop = $subsysmonitors{$m}->{"stopButton"} =
0363     $mw->Button(-text => "Stop All",
0364             -relief => "raised", -width=>5);
0365     $stop->grid(-column=>$column+1,-row=>$row+2);
0366     $stop->configure(-state=>'active', -command=>sub { send_subsys_monitor_command($m,"stop") } );
0367 
0368     $column+=2;
0369 
0370     if ( $column/2 >= $ncolumns ) {
0371     $column=0;
0372     $row+=3;
0373     }
0374 
0375 }
0376 
0377 
0378 $column = 18;
0379 my $allbutton = $mw->Button(-text => "Seriously ALL",
0380                 -relief=>'flat',
0381                 -width=>10);
0382 $allbutton->grid(-column=>$column,-row=>$row);
0383 $allbutton->configure(-background=>'orange');
0384 
0385 $stopall = $mw->Button(-text=>'Stop All',
0386                -command=> sub { send_monitor_command_to_all("stop") });
0387 
0388 $stopall->grid(-column=>$column+1,-row=>$row);
0389 $stopall->configure(-state=>'active',-width=>7);
0390 $row++;
0391 $startall = $mw->Button(-text=>'Start All',
0392             -command=> sub { send_monitor_command_to_all("start") });
0393 $startall->grid(-column=>$column+1,-row=>$row);
0394 $startall->configure(-state=>'active',-width=>7);
0395 $row++;
0396 $restartall = $mw->Button(-text=>'Restart All',
0397               -command=> sub { send_monitor_command_to_all("restart") });
0398 $restartall->grid(-column=>$column+1,-row=>$row);
0399 $restartall->configure(-state=>'active',-width=>7);
0400 
0401 my $logoimg = $mw->Photo('logo',-file=>"$ENV{'ONLMON_SERVERWATCHER'}/sphenix-logo_transparent_small.gif");
0402 my $logo = $mw->Label('-image'=>'logo');
0403 $row+=$ncolumns;
0404 $logo->grid(-column=>0,-row=>$row,-columnspan=>2);
0405 
0406 my $quitButton = $mw->Label(-text => "Quit", -relief => 'raised');
0407 $quitButton->grid(-column=>2,-row=>$row);
0408 $quitButton->bind('<Button-1>' => sub {$mw->destroy});
0409 $quitButton->configure(-width=>5);
0410 
0411 
0412 my $date = $mw->Label(-text => "", -relief => 'sunk');
0413 $date->grid(-column=>(2*$ncolumns-2),-row=>$row,-columnspan=>2,-sticky=>'ew');
0414 
0415 $mw->repeat($interval,\&update);
0416 
0417 print "\n\n";
0418 
0419 &update("all");
0420 
0421 MainLoop;
0422 
0423 #_____________________________________________________________________________
0424 sub update() {
0425 
0426     my $m = shift;
0427 
0428     if (defined($m)) {
0429     checkMonitor($m);
0430     }
0431     else {
0432     checkMonitor("all");
0433     }
0434     $date->configure(-text => "Last check : " . ctime());
0435 }