I have 6000 images, most of which I beleive to be duplicates. I wish to sort all the unique images into a new directory using perl. Some unique images have the same file size so they cannot be sorted by size. I wrote this program to do the job, but it only finds two unique images and I know there are more. Where am I going wrong?
#!/usr/bin/perl
use strict;
##init vars
my $doc = '';
my $undoc = '';
my $newdoc = '';
my $newundoc = '';
my $forcounter = 0;
my $skip = 0;
my $dir = 'D:\Earth\digits\\';
opendir (DIR, $dir) or die "cannot opendir $dir";
foreach my $file (readdir(DIR)) {
unless ($file eq '.' or $file eq '..') {
$forcounter = 0;
$skip = 0;
&process_file ($file);
}
}
closedir (DIR);
sub process_file() {
$_ = shift;
open (IMAGE, "$dir$_"
or die "Cannot open file $dir$_: $!";
binmode IMAGE;
read( IMAGE, $doc, 10000 );
close IMAGE;
my $uniquedir = 'D:\Earth\unique\\';
opendir (DIR, $uniquedir) or die "cannot opendir $uniquedir";
foreach my $unfile (readdir(DIR)) {
unless ($unfile eq '.' or $unfile eq '..') {
$forcounter++;
open (IMAGE, "$uniquedir$unfile"
or die "Cannot open file $dir$_: $!";
binmode IMAGE;
read( IMAGE, $undoc, 10000 );
close IMAGE;
my @chars = split (//,$doc);
my @unchars = split (//,$undoc);
while (@chars) {
if (shift(@chars) == shift(@unchars)) {
}
else {
$skip++;
last;
}
if ($skip) {
last;
}
}
}
}
if ($skip == $forcounter) {
open(IMAGE, ">D:\\Earth\\unique\\$_"
|| die"$_.jpg: $!";
binmode IMAGE;
print IMAGE $doc;
close IMAGE;
print "$dir$_ is unique and is saved away\n";
}
else {
print "$dir$_ is NOT unique\n";
}
closedir (DIR);
}
Thanks, Celia
#!/usr/bin/perl
use strict;
##init vars
my $doc = '';
my $undoc = '';
my $newdoc = '';
my $newundoc = '';
my $forcounter = 0;
my $skip = 0;
my $dir = 'D:\Earth\digits\\';
opendir (DIR, $dir) or die "cannot opendir $dir";
foreach my $file (readdir(DIR)) {
unless ($file eq '.' or $file eq '..') {
$forcounter = 0;
$skip = 0;
&process_file ($file);
}
}
closedir (DIR);
sub process_file() {
$_ = shift;
open (IMAGE, "$dir$_"
binmode IMAGE;
read( IMAGE, $doc, 10000 );
close IMAGE;
my $uniquedir = 'D:\Earth\unique\\';
opendir (DIR, $uniquedir) or die "cannot opendir $uniquedir";
foreach my $unfile (readdir(DIR)) {
unless ($unfile eq '.' or $unfile eq '..') {
$forcounter++;
open (IMAGE, "$uniquedir$unfile"
binmode IMAGE;
read( IMAGE, $undoc, 10000 );
close IMAGE;
my @chars = split (//,$doc);
my @unchars = split (//,$undoc);
while (@chars) {
if (shift(@chars) == shift(@unchars)) {
}
else {
$skip++;
last;
}
if ($skip) {
last;
}
}
}
}
if ($skip == $forcounter) {
open(IMAGE, ">D:\\Earth\\unique\\$_"
binmode IMAGE;
print IMAGE $doc;
close IMAGE;
print "$dir$_ is unique and is saved away\n";
}
else {
print "$dir$_ is NOT unique\n";
}
closedir (DIR);
}
Thanks, Celia