#!/usr/bin/perl -w
BEGIN {
use FindBin qw($RealBin); # warn $FindBin::RealBin; exit;
use lib (
"$RealBin",
"$RealBin/../../lib",
'/home/raj/perl5/lib/perl5',
);
}
use strict;
use warnings;
use DB;
use Config::Tiny;
use Data::Dumper;
use Spreadsheet::WriteExcel::Simple;
my $src = "$RealBin/.db";
my $cfg = Config::Tiny->read($src); # warn Dumper $cfg; exit;
my $DB = DB->new(config => $cfg);
# hashref of DBI & DBIx::Simple db handles for source(hilis3) & destination (hilis4) dbs:
my $db = $DB->connect;
my $out_file = 'duplicate_nhs_numbers_hilis3.xls';
my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row( [ qw(p_id last_name first_name dob patient_no nhs_number) ] );
my $not_unique = 'SELECT NHSNo FROM PID GROUP BY NHSNo HAVING COUNT(NHSNo) > 1 ORDER BY NHSNo';
my $pid = 'SELECT P_ID, LName, FName, DoB, PatNo, NHSNo FROM PID WHERE NHSNo = ?';
my $result = $db->{dbix3}->query($not_unique);
my %nhs_numbers;
{
no warnings 'uninitialized'; # eg null dob
while ( my $nhs_no = $result->list ) { # print $nhs_no, "\n";
for my $row ( $db->{dbix3}->query($pid, $nhs_no)->arrays ) {
# create hash of nhs_number => patient data:
my $patient = join '~', @{$row}[1..3]; # warn Dumper $patient;
$nhs_numbers{$nhs_no}{$patient}++;
$xl->write_row($row);
}
} # warn Dumper \%nhs_numbers;
}
open my $log, '>', './duplicate_nhs_numbers.txt' or die $!;
# print $log Dumper \%nhs_numbers; exit;
# foreach nhs_number, need to know which has > 1 data set:
while ( my ($nhs_number, $data) = each %nhs_numbers ) { # warn scalar keys %$data;
next unless scalar keys %$data > 1;
print $log join ' ', $nhs_number, Dumper $nhs_numbers{$nhs_number};
}
$xl->save( './' . $out_file );
__END__
# use for hilis4 tables:
DROP TABLE IF EXISTS duplicate_nhs_numbers;
CREATE TEMPORARY TABLE duplicate_nhs_numbers ( nhs_number char(10) );
INSERT INTO duplicate_nhs_numbers (nhs_number)
SELECT nhs_number FROM patients GROUP BY nhs_number HAVING COUNT(nhs_number) > 1;
SELECT p.last_name, p.first_name, p.dob, p.nhs_number
FROM patients p JOIN duplicate_nhs_numbers d on d.nhs_number = p.nhs_number
ORDER BY p.last_name, p.first_name