M
mcvallet
Hi,
I am coding a program that parses a file 370Mb. As long as I keep this
number less than a 1000 in this portion :
# basicly tells me until when i should continue to read the file)
if ($ligne =~ m/^.*1000>>>(\w+).*/){
$stop= 1;
}
it works, but as soon as I increase the number (the max number being
2225) so I am not even reading 1/2 of it, the program does not respond.
Does anybody have a suggestion for this ?
thank you,
##############################################################################"
$#complete = 4000000;
open(OUTPUTFILE, $outPut)
|| die "cannot open file";
#variable initialisation
my $countTotPositive = 0;
my $countTotNegative = 0;
my $stop= 0;
my $countTotProt = 0;
my @start = times();
while(($ligne = <OUTPUTFILE> ) && $stop == 0){
#identifying the protein being compared
if ($ligne =~ m/^.+(\d*)+>>>\s*(\w+).*/){
#the next commented lignes are here for test purposes
if ($ligne =~ m/^.*1200>>>(\w+).*/){
$stop= 1;
}
$protName1 = $2;
$protName1 =~ s/_//g;
$count = 0;
}
#parsing the results
else{
$_=$ligne ;
my $evalue= 0;
/^\s?(\w+).*\s+\(\s*(\d+)\)\W+(\d+)\W+(\d*)\.?(\d*)\W+(\d*)\.?(\d*)e?\+?(\d{1,2})$/so;
my $protName2=$1;
my $nbAa=$2;
my $eval3=$3;
my $eval4=$4;
my $eval5=$5;
$eval[0]="$6";
$eval[1]=$7;
my $eval8=$8;
$protName2 =~ s/_//g;
#finding out what is the evalue for this result
if ($ligne =~ m/e\+(\d{2,2})$/so){
$evalue = $eval[0].".".@eval;
for ($i = 0; $i < $eval8; $i++){
$evalue = $evalue * 10;
}
}else{
if ($eval[0] =~ m/^0/){
$evalue = $eval[0].".".$eval[1].$eval8;
}else{
$evalue = $eval[0].$eval[1].$eval8;
}
}
@sortedCouple = sort($protName1,$protName2);
if ($complete{"$sortedCouple[0]-$sortedCouple[1]"}[0]
|| $sortedCouple[0] =~ m/$sortedCouple[1]/i){
$evalue2 = $evalue;
#modifying the evalue 1 if the identical couple
if($sortedCouple[0] =~ m/$sortedCouple[1]/i){
$evalue1 = $evalue;
$identical =1;
$countTotPositive++;
}else{
$evalue1 = $complete{"$sortedCouple[0]-$sortedCouple[1]"}[0];
$identical =$complete{"$sortedCouple[0]-$sortedCouple[1]"}[1];
}
$complete{"$sortedCouple[0]-$sortedCouple[1]"} = [$protName1,
$protName2, $evalue1 + $evalue2, $identical, $evalue1, $evalue2];
$count++;
}
# temporaly saving the partial results
else{
$class1 = $classes{$protName1};
$class2 = $classes{$protName2};
$identical = ( $class1=~ m/$class2/ ? 1 : 0);
if ($identical == 1){
$countTotPositive++;
}else{
$countTotNegative++;
}
$complete{"$sortedCouple[0]-$sortedCouple[1]"} = [$evalue,
$identical];
}
}
}
close OUTPUTFILE;
#variable initialisation
$countPositive = 0;
$countNegative = 0;
foreach $complete (sort{$complete{$a}[2]<=> $complete{$b}[2]} keys
%complete) {
if ($complete{$complete}[3] == 1){
$countPositive++;
}else{
$countNegative++;
}
$newLigne =
$complete{$complete}[0]."\t".$complete{$complete}[1]."\t".$complete{$complete}[2]."\t".$complete{$complete}[3]."\t".$countPositive/$countTotPositive."\t".$countNegative/$countTotNegative."\t".$complete{$complete}[4]."\t".$complete{$complete}[5]."\n";
push @results,$newLigne;
}
@end = times();
# ============= Analyse results
print "Reading and parsing file took ",$end[0]-$start[0]," cpu
seconds\n";
# creation du document
print "\n";
@start = times();
open (F,">results/5out.test");
print F "@results";
close F;
@end = times();
# ============= Analyse results
print "Writting the file results/5out.test",$end[0]-$start[0]," cpu
seconds\n";
}
##############################################################################""
I am coding a program that parses a file 370Mb. As long as I keep this
number less than a 1000 in this portion :
# basicly tells me until when i should continue to read the file)
if ($ligne =~ m/^.*1000>>>(\w+).*/){
$stop= 1;
}
it works, but as soon as I increase the number (the max number being
2225) so I am not even reading 1/2 of it, the program does not respond.
Does anybody have a suggestion for this ?
thank you,
##############################################################################"
$#complete = 4000000;
open(OUTPUTFILE, $outPut)
|| die "cannot open file";
#variable initialisation
my $countTotPositive = 0;
my $countTotNegative = 0;
my $stop= 0;
my $countTotProt = 0;
my @start = times();
while(($ligne = <OUTPUTFILE> ) && $stop == 0){
#identifying the protein being compared
if ($ligne =~ m/^.+(\d*)+>>>\s*(\w+).*/){
#the next commented lignes are here for test purposes
if ($ligne =~ m/^.*1200>>>(\w+).*/){
$stop= 1;
}
$protName1 = $2;
$protName1 =~ s/_//g;
$count = 0;
}
#parsing the results
else{
$_=$ligne ;
my $evalue= 0;
/^\s?(\w+).*\s+\(\s*(\d+)\)\W+(\d+)\W+(\d*)\.?(\d*)\W+(\d*)\.?(\d*)e?\+?(\d{1,2})$/so;
my $protName2=$1;
my $nbAa=$2;
my $eval3=$3;
my $eval4=$4;
my $eval5=$5;
$eval[0]="$6";
$eval[1]=$7;
my $eval8=$8;
$protName2 =~ s/_//g;
#finding out what is the evalue for this result
if ($ligne =~ m/e\+(\d{2,2})$/so){
$evalue = $eval[0].".".@eval;
for ($i = 0; $i < $eval8; $i++){
$evalue = $evalue * 10;
}
}else{
if ($eval[0] =~ m/^0/){
$evalue = $eval[0].".".$eval[1].$eval8;
}else{
$evalue = $eval[0].$eval[1].$eval8;
}
}
@sortedCouple = sort($protName1,$protName2);
if ($complete{"$sortedCouple[0]-$sortedCouple[1]"}[0]
|| $sortedCouple[0] =~ m/$sortedCouple[1]/i){
$evalue2 = $evalue;
#modifying the evalue 1 if the identical couple
if($sortedCouple[0] =~ m/$sortedCouple[1]/i){
$evalue1 = $evalue;
$identical =1;
$countTotPositive++;
}else{
$evalue1 = $complete{"$sortedCouple[0]-$sortedCouple[1]"}[0];
$identical =$complete{"$sortedCouple[0]-$sortedCouple[1]"}[1];
}
$complete{"$sortedCouple[0]-$sortedCouple[1]"} = [$protName1,
$protName2, $evalue1 + $evalue2, $identical, $evalue1, $evalue2];
$count++;
}
# temporaly saving the partial results
else{
$class1 = $classes{$protName1};
$class2 = $classes{$protName2};
$identical = ( $class1=~ m/$class2/ ? 1 : 0);
if ($identical == 1){
$countTotPositive++;
}else{
$countTotNegative++;
}
$complete{"$sortedCouple[0]-$sortedCouple[1]"} = [$evalue,
$identical];
}
}
}
close OUTPUTFILE;
#variable initialisation
$countPositive = 0;
$countNegative = 0;
foreach $complete (sort{$complete{$a}[2]<=> $complete{$b}[2]} keys
%complete) {
if ($complete{$complete}[3] == 1){
$countPositive++;
}else{
$countNegative++;
}
$newLigne =
$complete{$complete}[0]."\t".$complete{$complete}[1]."\t".$complete{$complete}[2]."\t".$complete{$complete}[3]."\t".$countPositive/$countTotPositive."\t".$countNegative/$countTotNegative."\t".$complete{$complete}[4]."\t".$complete{$complete}[5]."\n";
push @results,$newLigne;
}
@end = times();
# ============= Analyse results
print "Reading and parsing file took ",$end[0]-$start[0]," cpu
seconds\n";
# creation du document
print "\n";
@start = times();
open (F,">results/5out.test");
print F "@results";
close F;
@end = times();
# ============= Analyse results
print "Writting the file results/5out.test",$end[0]-$start[0]," cpu
seconds\n";
}
##############################################################################""