C
Clint O
The following program I wrote I'm using to find duplicate files. The problem is that I have files with whitespace or potentially other special characters:
#!/opt/local/bin/perl
use Digest::MD5;
use File::Find;
use Data:
umper;
use strict;
use warnings;
my %results = ();
sub do_file;
my @files = @ARGV;
exit 1 if !@files;
find(sub { do_file(\%results) }, @files );
for (keys %results) {
my @f = @{$results{$_}};
if (scalar @f > 1) {
print "$f[0] => $f[1]\n";
}
}
sub do_file {
my ($hash) = @_;
return if -d $_;
open(my $fh, $_) or die "Can't open '$File::Find::name': $!";
binmode $fh;
my $digest;
$digest = Digest::MD5->new->addfile($fh)->hexdigest;
close $fh;
push @{$hash->{$digest}}, $File::Find::name;
}
0;
If I create a test directory:
$ mkdir test_dir
$ cd test_dir
$ touch " my file"
$ ./dupcheck testdir
Can't open 'testdir/ my file': No such file or directory at ./dupcheck line 32.
I can't be the first one who has run into this problem, and I'm sure there's a reasonable explanation for how to cope with this, but I haven't been able to find anything via the searching etc. on the web.
Thanks,
-Clint
#!/opt/local/bin/perl
use Digest::MD5;
use File::Find;
use Data:
use strict;
use warnings;
my %results = ();
sub do_file;
my @files = @ARGV;
exit 1 if !@files;
find(sub { do_file(\%results) }, @files );
for (keys %results) {
my @f = @{$results{$_}};
if (scalar @f > 1) {
print "$f[0] => $f[1]\n";
}
}
sub do_file {
my ($hash) = @_;
return if -d $_;
open(my $fh, $_) or die "Can't open '$File::Find::name': $!";
binmode $fh;
my $digest;
$digest = Digest::MD5->new->addfile($fh)->hexdigest;
close $fh;
push @{$hash->{$digest}}, $File::Find::name;
}
0;
If I create a test directory:
$ mkdir test_dir
$ cd test_dir
$ touch " my file"
$ ./dupcheck testdir
Can't open 'testdir/ my file': No such file or directory at ./dupcheck line 32.
I can't be the first one who has run into this problem, and I'm sure there's a reasonable explanation for how to cope with this, but I haven't been able to find anything via the searching etc. on the web.
Thanks,
-Clint