RSS Git Download  Clone
Raw Blame History
#!/usr/bin/perl -w

BEGIN {
    use FindBin qw($RealBin); # warn $FindBin::RealBin; exit;

	use lib (
        "$RealBin",
        "$RealBin/../../lib",
		'/home/raj/perl5/lib/perl5',
	);
}

use strict;
use warnings;

use DB;
use Config::Tiny;
use Data::Dumper;
use Spreadsheet::WriteExcel::Simple;

my $src = "$RealBin/.db";
my $cfg = Config::Tiny->read($src); # warn Dumper $cfg; exit;

my $DB = DB->new(config => $cfg);

# hashref of DBI & DBIx::Simple db handles for source(hilis3) & destination (hilis4) dbs:
my $db = $DB->connect;

my $out_file = 'duplicate_nhs_numbers_hilis3.xls';

my $xl = Spreadsheet::WriteExcel::Simple->new;

$xl->write_bold_row( [ qw(p_id last_name first_name dob patient_no nhs_number) ] );

my $not_unique = 'SELECT NHSNo FROM PID GROUP BY NHSNo HAVING COUNT(NHSNo) > 1 ORDER BY NHSNo';
my $pid = 'SELECT P_ID, LName, FName, DoB, PatNo, NHSNo FROM PID WHERE NHSNo = ?';

my $result = $db->{dbix3}->query($not_unique);

my %nhs_numbers;

{
	no warnings 'uninitialized'; # eg null dob
	while ( my $nhs_no = $result->list ) { # print $nhs_no, "\n";
		for my $row ( $db->{dbix3}->query($pid, $nhs_no)->arrays ) {
			# create hash of nhs_number => patient data:
			my $patient = join '~', @{$row}[1..2]; # warn Dumper $patient;
			$nhs_numbers{$nhs_no}{$patient}++;
			
			# $xl->write_row($row);
		}
	} # warn Dumper \%nhs_numbers;
}

open my $log, '>', './duplicate_nhs_numbers.txt' or die $!;
# print $log Dumper \%nhs_numbers; exit;

# foreach nhs_number, need to know which has > 1 data set:
while ( my ($nhs_number, $data) = each %nhs_numbers ) { # warn scalar keys %$data;
	next unless scalar keys %$data > 1;
	print $log join ' ', $nhs_number, Dumper $nhs_numbers{$nhs_number}; 
}

$xl->save( './' . $out_file );

__END__
# use for hilis4 tables:
DROP TABLE IF EXISTS duplicate_nhs_numbers;

CREATE TEMPORARY TABLE duplicate_nhs_numbers ( nhs_number char(10) );

INSERT INTO duplicate_nhs_numbers (nhs_number)
   SELECT nhs_number FROM patients GROUP BY nhs_number HAVING COUNT(nhs_number) > 1;

SELECT p.last_name, p.first_name, p.dob, p.nhs_number
FROM patients p JOIN duplicate_nhs_numbers d on d.nhs_number = p.nhs_number
ORDER BY p.last_name, p.first_name