B
bettyann
hi all,
can anyone help me limit the greediness of my substitution pattern? i
have a CSV file and i want to insert a new column of values after the
6th column. but the new data to be inserted is dependent upon the
value of the 6th column.
example original data:
2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1
1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1
5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1
8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1
i want to put "0" after the 6th column if the 6th column contains
"hold.bmp".
i want to put "-1" after the 6th column if the 6th column contains
"NaN".
i thought i could do this with two substitutions commands:
s/^((.*?,){5}?(hold.bmp))/$1,0/
s/^((.*?,){5}?(NaN))/$1,-1/
i cannot limit the matching of "hold.bmp" or "NaN". i want this
pattern to match *only* if "hold.bmp" or "NaN" immediately follows the
5th column.
my test code:
#!/usr/local/bin/perl
use strict;
use warnings;
my $input = <<EOF;
2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1
1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1
5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1
8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1
EOF
my @oData = split( '\n', $input );
my $line;
my $cnt = 0;
foreach $line ( @oData ) {
printf( "$cnt) $line \n" );
$cnt++;
}
my $prevCol = 5;
my @txtList = ( "hold.bmp", "NaN" );
my @valList = ( "0", "-1" );
my ( $txt, $cmd, $i );
$i = 0;
foreach $txt ( @txtList ) {
$cmd = sprintf( '$line =~ s/^((.*?,){%d}?(%s))/$1,%s/;',
$prevCol, $txt, $valList[$i] );
printf( "\ncmd >>$cmd<< \n" );
foreach $line ( @oData ) {
printf( "orig line |$line| \n" );
eval $cmd;
printf( " new line |$line| \n---------------------\n" );
}
$i++;
}
exit;
output:
% test2.pl
0) 2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1
1) 1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1
2) 5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1
3) 8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1
cmd >>$line =~ s/^((.*?,){5}?(hold.bmp))/$1,0/;<<
orig line |2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1|
new line |2,NaN,NaN,NaN,64,hold.bmp,0,1607444,NaN,NaN,NaN,hold.bmp,NaN,1|
---------------------
orig line |1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1|
new line |1,NaN,NaN,NaN,32,hold.bmp,0,1607488,NaN,NaN,NaN,hold.bmp,3,1|
---------------------
orig line |5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1|
new line |5,NaN,NaN,4,32,hold.bmp,0,1607503,NaN,NaN,8,go.bmp,NaN,1|
---------------------
orig line |8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1|
new line |8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,0,NaN,1|
---------------------
cmd >>$line =~ s/^((.*?,){5}?(NaN))/$1,-1/;<<
orig line |2,NaN,NaN,NaN,64,hold.bmp,0,1607444,NaN,NaN,NaN,hold.bmp,NaN,1|
new line |2,NaN,NaN,NaN,64,hold.bmp,0,1607444,NaN,-1,NaN,NaN,hold.bmp,NaN,1|
can anyone help me limit the greediness of my substitution pattern? i
have a CSV file and i want to insert a new column of values after the
6th column. but the new data to be inserted is dependent upon the
value of the 6th column.
example original data:
2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1
1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1
5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1
8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1
i want to put "0" after the 6th column if the 6th column contains
"hold.bmp".
i want to put "-1" after the 6th column if the 6th column contains
"NaN".
i thought i could do this with two substitutions commands:
s/^((.*?,){5}?(hold.bmp))/$1,0/
s/^((.*?,){5}?(NaN))/$1,-1/
i cannot limit the matching of "hold.bmp" or "NaN". i want this
pattern to match *only* if "hold.bmp" or "NaN" immediately follows the
5th column.
my test code:
#!/usr/local/bin/perl
use strict;
use warnings;
my $input = <<EOF;
2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1
1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1
5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1
8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1
EOF
my @oData = split( '\n', $input );
my $line;
my $cnt = 0;
foreach $line ( @oData ) {
printf( "$cnt) $line \n" );
$cnt++;
}
my $prevCol = 5;
my @txtList = ( "hold.bmp", "NaN" );
my @valList = ( "0", "-1" );
my ( $txt, $cmd, $i );
$i = 0;
foreach $txt ( @txtList ) {
$cmd = sprintf( '$line =~ s/^((.*?,){%d}?(%s))/$1,%s/;',
$prevCol, $txt, $valList[$i] );
printf( "\ncmd >>$cmd<< \n" );
foreach $line ( @oData ) {
printf( "orig line |$line| \n" );
eval $cmd;
printf( " new line |$line| \n---------------------\n" );
}
$i++;
}
exit;
output:
% test2.pl
0) 2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1
1) 1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1
2) 5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1
3) 8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1
cmd >>$line =~ s/^((.*?,){5}?(hold.bmp))/$1,0/;<<
orig line |2,NaN,NaN,NaN,64,hold.bmp,1607444,NaN,NaN,NaN,hold.bmp,NaN,1|
new line |2,NaN,NaN,NaN,64,hold.bmp,0,1607444,NaN,NaN,NaN,hold.bmp,NaN,1|
---------------------
orig line |1,NaN,NaN,NaN,32,hold.bmp,1607488,NaN,NaN,NaN,hold.bmp,3,1|
new line |1,NaN,NaN,NaN,32,hold.bmp,0,1607488,NaN,NaN,NaN,hold.bmp,3,1|
---------------------
orig line |5,NaN,NaN,4,32,hold.bmp,1607503,NaN,NaN,8,go.bmp,NaN,1|
new line |5,NaN,NaN,4,32,hold.bmp,0,1607503,NaN,NaN,8,go.bmp,NaN,1|
---------------------
orig line |8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,NaN,1|
new line |8,NaN,NaN,4,32,NaN,1607564,NaN,NaN,8,hold.bmp,0,NaN,1|
---------------------
cmd >>$line =~ s/^((.*?,){5}?(NaN))/$1,-1/;<<
orig line |2,NaN,NaN,NaN,64,hold.bmp,0,1607444,NaN,NaN,NaN,hold.bmp,NaN,1|
new line |2,NaN,NaN,NaN,64,hold.bmp,0,1607444,NaN,-1,NaN,NaN,hold.bmp,NaN,1|