D
danrumney
Hi all,
I have a script (see below) that is driving me nuts.
It runs as a daemon on a RHEL box and does just about everything
correctly. It's job is to scan a hash of jobs every 15s to see if
there are any new ones that need to be performed.
Each time a new job is found, this script forks off a child to perform
the necessary task.
Prior to forking off the child, it runs a utility called 'cleanXML'
which is designed to remove invalid characters from an XML file.
I use system to call the utility. For some reason, the return code is
'No child processes'. But, the thing is, it has absolutley no problem
forking just a few lines later...
So my question is... what's going on!?! If I think about it, the
return value of -1 means that after the hidden fork() that system()
performs, the parent process couldn't find any child processes, which
means either that the fork() didn't work or that the child died before
the parent process could proceed... is that feasible?
Any other hypotheses would be warmly welcomed
Thanks,
Dan
#!/usr/bin/perl
use strict;
use svcNav::svcView; # Custom module
use svcNav::svcControl; # Custom module
use Data:
umper;
use diagnostics;
sub message(@);
sub error(@);
sub wall(@);
my $access_log = "/var/log/svcObjectBrowserJobs.log";
my $error_log = "/var/log/svcObjectBrowserJobs.error.log";
open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
open STDOUT, ">>$access_log" or die "Can't write to $access_log: $!";
open STDERR, ">>$error_log" or die "Can't write to $error_log: $!";
# Change to root so that unmounting filesystems will be possible
chdir '/';
my $canCleanXML = "no";
my $xmlCleaner = "/root/cleanXML"; #Custom utility that clears
invalid chars from an XML file
# Check to see if we can execute the cleanXML script
if ( -x $xmlCleaner )
{
$canCleanXML = "yes";
}
wall( "objectBrowser daemon started PID:" . $$ );
wall( "XML Cleaning will",
( $canCleanXML eq "yes" ) ? " " : " not ",
"be performed");
#
# In order to reset this daemon, simply put a job of type "reset" in
the queue
# The dameon will reset once there are no more running jobs
my $endLoop = "no";
my $runningJobs = 0;
my $resetJobId = 0;
# Main loop
do
{
# Scan the job queue no more often than every 15s
# The period may be longer, depending on how many jobs get started
in this
# pass through
sleep 15;
my $jobs = svcView::getJobs(); # get a hash of 'jobs'
# Throw out the jobs that have already failed and
# Count the running jobs
$runningJobs = 0;
my $nonFailedJobs = {};
foreach my $jobKey ( keys %$jobs )
{
if ( $jobs->{$jobKey}->{'type'} eq "reset" )
{
if ($endLoop eq "no")
{
wall("Reset request received");
$resetJobId = $jobKey;
$endLoop = "yes";
}
}
elsif ( defined( $jobs->{$jobKey}->{'pid'} ) )
{
$runningJobs++;
}
elsif ( $jobs->{$jobKey}->{'failed'} eq "no" )
{
$nonFailedJobs->{$jobKey} = $jobs->{$jobKey};
}
}
if (($runningJobs > 0) && ($endLoop eq "no"))
{
message("Found $runningJobs running jobs");
}
# Go through the non failed jobs and start parsing them up to a
maximum
# of 6 concurrent processes
foreach my $jobKey ( keys %$nonFailedJobs )
{
# Only start new jobs if the daemon is not going to end
if (( $runningJobs < 6 ) && ($endLoop eq "no"))
{
message( "Trying to start a "
. $jobs->{$jobKey}->{'type'}
. " job on fileId: "
. $nonFailedJobs->{$jobKey}->{'fileId'} );
# If XML Cleaning is enabled, we clean the file first
# I found that if this was done after the fork, it would
fail
# I've moved it here because it's pretty quick
my $cleaningOutcome = "Not performed";
if ( $canCleanXML eq "yes" )
{
my $fileId = $nonFailedJobs->{$jobKey}->{'fileId'};
my $xmlFile = svcView::getFileName($fileId);
message("Cleaning file: $xmlFile");
my @cleanArgs =
( $xmlCleaner, $xmlFile, $xmlFile . ".cleaned" );
# Run the cleaning utility
system(@cleanArgs);
if ( $? == -1 )
{
message("XML Cleaning failed, see error log");
error("Failed to execute XML Cleaner: $!");
$cleaningOutcome = "Failed due to cleaner not
executing";
}
elsif ( $? & 127 )
{
message("XML Cleaning failed, see errror log");
my $errMsg = sprintf(
"child died with signal %d, %s coredump\n",
( $? & 127 ),
( $? & 128 ) ? 'with' : 'without'
);
error($errMsg);
$cleaningOutcome = "Failed due to cleaner
terminating";
}
elsif ( ( $? >> 8 ) != 0 )
{
message("XML Cleaning failed, see errror log");
my $errMsg =
sprintf( "child exited with value %d\n", $? >>
8 );
error($errMsg);
$cleaningOutcome =
"Failed due to cleaner exiting with failure
code";
}
else
{
message("XML Cleaning successful");
$cleaningOutcome = "Cleaning completed
successfully";
`cp $xmlFile.cleaned $xmlFile`;
}
`rm $xmlFile.cleaned`;
}
# Now that the file is clean, we start a process to parse
it
$SIG{CHLD} = 'IGNORE';
my $jobPID = fork();
svcView::forked();
svcControl::forked();
if ( !defined($jobPID) )
{
error("Couldn't fork!");
}
elsif ( $jobPID != 0 )
{
# Daemon's code path
eval
{
wall( "Starting proc. ".$jobPID." for jobKey ".
$jobKey);
svcControl::markJobStarted( $jobKey, $jobPID );
};
if ($@)
{
error( "Daemon failed for some reason: " . $@ );
}
$runningJobs++;
sleep 10;
}
else
{
# Parsing code path
eval
{
my $fileId = $nonFailedJobs->{$jobKey}-
message( "Creating snap for File: $fileId ",
"with tracking number $trackNum ",
"and comment $comment");
my $parseResult = 0;
my $parseMessage = "";
eval
{
$parseResult =
svcControl:
rocessSnapFile( $fileId,
$trackNum,
$comment,
\
$parseMessage );
};
# Check the outcome of the parsing and mark the
job
# appropriately
if ( $@ || $parseResult != 0 )
{
wall( "Job failed: " . $@ . " "
. $parseMessage
. "\nXML Cleaning: "
. $cleaningOutcome );
svcControl::markJobFailed( $jobKey,
$parseResult,
$parseMessage
. " XML Cleaning: "
. $cleaningOutcome );
}
else
{
message("Job finished");
svcControl::markJobDone( $jobKey,$fileId );
}
};
if ($@)
{
error( "Parsing process failed for some reason:
" . $@ );
}
exit;
}
}
}
} while (($endLoop eq "no") || ($runningJobs != 0) );
# If execution gets to here, then the while loop has ended, so the
script is
# about to end
svcControl::markJobDone($resetJobId, "NULL");
wall("Ending daemon");
#
# These subroutines are used to post messages into the log files
# You have to concatenate localtime with a string to get it in human
readable
# format, but you need to print a list so that @_ is read in a list
context
#
sub message(@)
{
print localtime() . ": " , @_ , "\n";
}
sub error(@)
{
print STDERR localtime() . ": " , @_ , "\n";
}
sub wall(@)
{
message(@_);
error(@_);
}
I have a script (see below) that is driving me nuts.
It runs as a daemon on a RHEL box and does just about everything
correctly. It's job is to scan a hash of jobs every 15s to see if
there are any new ones that need to be performed.
Each time a new job is found, this script forks off a child to perform
the necessary task.
Prior to forking off the child, it runs a utility called 'cleanXML'
which is designed to remove invalid characters from an XML file.
I use system to call the utility. For some reason, the return code is
'No child processes'. But, the thing is, it has absolutley no problem
forking just a few lines later...
So my question is... what's going on!?! If I think about it, the
return value of -1 means that after the hidden fork() that system()
performs, the parent process couldn't find any child processes, which
means either that the fork() didn't work or that the child died before
the parent process could proceed... is that feasible?
Any other hypotheses would be warmly welcomed
Thanks,
Dan
#!/usr/bin/perl
use strict;
use svcNav::svcView; # Custom module
use svcNav::svcControl; # Custom module
use Data:
use diagnostics;
sub message(@);
sub error(@);
sub wall(@);
my $access_log = "/var/log/svcObjectBrowserJobs.log";
my $error_log = "/var/log/svcObjectBrowserJobs.error.log";
open STDIN, '/dev/null' or die "Can't read /dev/null: $!";
open STDOUT, ">>$access_log" or die "Can't write to $access_log: $!";
open STDERR, ">>$error_log" or die "Can't write to $error_log: $!";
# Change to root so that unmounting filesystems will be possible
chdir '/';
my $canCleanXML = "no";
my $xmlCleaner = "/root/cleanXML"; #Custom utility that clears
invalid chars from an XML file
# Check to see if we can execute the cleanXML script
if ( -x $xmlCleaner )
{
$canCleanXML = "yes";
}
wall( "objectBrowser daemon started PID:" . $$ );
wall( "XML Cleaning will",
( $canCleanXML eq "yes" ) ? " " : " not ",
"be performed");
#
# In order to reset this daemon, simply put a job of type "reset" in
the queue
# The dameon will reset once there are no more running jobs
my $endLoop = "no";
my $runningJobs = 0;
my $resetJobId = 0;
# Main loop
do
{
# Scan the job queue no more often than every 15s
# The period may be longer, depending on how many jobs get started
in this
# pass through
sleep 15;
my $jobs = svcView::getJobs(); # get a hash of 'jobs'
# Throw out the jobs that have already failed and
# Count the running jobs
$runningJobs = 0;
my $nonFailedJobs = {};
foreach my $jobKey ( keys %$jobs )
{
if ( $jobs->{$jobKey}->{'type'} eq "reset" )
{
if ($endLoop eq "no")
{
wall("Reset request received");
$resetJobId = $jobKey;
$endLoop = "yes";
}
}
elsif ( defined( $jobs->{$jobKey}->{'pid'} ) )
{
$runningJobs++;
}
elsif ( $jobs->{$jobKey}->{'failed'} eq "no" )
{
$nonFailedJobs->{$jobKey} = $jobs->{$jobKey};
}
}
if (($runningJobs > 0) && ($endLoop eq "no"))
{
message("Found $runningJobs running jobs");
}
# Go through the non failed jobs and start parsing them up to a
maximum
# of 6 concurrent processes
foreach my $jobKey ( keys %$nonFailedJobs )
{
# Only start new jobs if the daemon is not going to end
if (( $runningJobs < 6 ) && ($endLoop eq "no"))
{
message( "Trying to start a "
. $jobs->{$jobKey}->{'type'}
. " job on fileId: "
. $nonFailedJobs->{$jobKey}->{'fileId'} );
# If XML Cleaning is enabled, we clean the file first
# I found that if this was done after the fork, it would
fail
# I've moved it here because it's pretty quick
my $cleaningOutcome = "Not performed";
if ( $canCleanXML eq "yes" )
{
my $fileId = $nonFailedJobs->{$jobKey}->{'fileId'};
my $xmlFile = svcView::getFileName($fileId);
message("Cleaning file: $xmlFile");
my @cleanArgs =
( $xmlCleaner, $xmlFile, $xmlFile . ".cleaned" );
# Run the cleaning utility
system(@cleanArgs);
if ( $? == -1 )
{
message("XML Cleaning failed, see error log");
error("Failed to execute XML Cleaner: $!");
$cleaningOutcome = "Failed due to cleaner not
executing";
}
elsif ( $? & 127 )
{
message("XML Cleaning failed, see errror log");
my $errMsg = sprintf(
"child died with signal %d, %s coredump\n",
( $? & 127 ),
( $? & 128 ) ? 'with' : 'without'
);
error($errMsg);
$cleaningOutcome = "Failed due to cleaner
terminating";
}
elsif ( ( $? >> 8 ) != 0 )
{
message("XML Cleaning failed, see errror log");
my $errMsg =
sprintf( "child exited with value %d\n", $? >>
8 );
error($errMsg);
$cleaningOutcome =
"Failed due to cleaner exiting with failure
code";
}
else
{
message("XML Cleaning successful");
$cleaningOutcome = "Cleaning completed
successfully";
`cp $xmlFile.cleaned $xmlFile`;
}
`rm $xmlFile.cleaned`;
}
# Now that the file is clean, we start a process to parse
it
$SIG{CHLD} = 'IGNORE';
my $jobPID = fork();
svcView::forked();
svcControl::forked();
if ( !defined($jobPID) )
{
error("Couldn't fork!");
}
elsif ( $jobPID != 0 )
{
# Daemon's code path
eval
{
wall( "Starting proc. ".$jobPID." for jobKey ".
$jobKey);
svcControl::markJobStarted( $jobKey, $jobPID );
};
if ($@)
{
error( "Daemon failed for some reason: " . $@ );
}
$runningJobs++;
sleep 10;
}
else
{
# Parsing code path
eval
{
my $fileId = $nonFailedJobs->{$jobKey}-
my $trackNum = $nonFailedJobs->{$jobKey}-{'fileId'};
my $comment = $nonFailedJobs->{$jobKey}-{'trackingNum'};
{'comment'};
message( "Creating snap for File: $fileId ",
"with tracking number $trackNum ",
"and comment $comment");
my $parseResult = 0;
my $parseMessage = "";
eval
{
$parseResult =
svcControl:
$trackNum,
$comment,
\
$parseMessage );
};
# Check the outcome of the parsing and mark the
job
# appropriately
if ( $@ || $parseResult != 0 )
{
wall( "Job failed: " . $@ . " "
. $parseMessage
. "\nXML Cleaning: "
. $cleaningOutcome );
svcControl::markJobFailed( $jobKey,
$parseResult,
$parseMessage
. " XML Cleaning: "
. $cleaningOutcome );
}
else
{
message("Job finished");
svcControl::markJobDone( $jobKey,$fileId );
}
};
if ($@)
{
error( "Parsing process failed for some reason:
" . $@ );
}
exit;
}
}
}
} while (($endLoop eq "no") || ($runningJobs != 0) );
# If execution gets to here, then the while loop has ended, so the
script is
# about to end
svcControl::markJobDone($resetJobId, "NULL");
wall("Ending daemon");
#
# These subroutines are used to post messages into the log files
# You have to concatenate localtime with a string to get it in human
readable
# format, but you need to print a list so that @_ is read in a list
context
#
sub message(@)
{
print localtime() . ": " , @_ , "\n";
}
sub error(@)
{
print STDERR localtime() . ": " , @_ , "\n";
}
sub wall(@)
{
message(@_);
error(@_);
}