#!/usr/bin/perl -w BEGIN { use FindBin qw($RealBin); # warn $FindBin::RealBin; exit; use lib ( "$RealBin", "$RealBin/../../lib", '/home/raj/perl5/lib/perl5', ); } use strict; use warnings; use DB; use Config::Tiny; use Data::Dumper; use Spreadsheet::WriteExcel::Simple; my $src = "$RealBin/.db"; my $cfg = Config::Tiny->read($src); # warn Dumper $cfg; exit; my $DB = DB->new(config => $cfg); # hashref of DBI & DBIx::Simple db handles for source(hilis3) & destination (hilis4) dbs: my $db = $DB->connect; my $out_file = 'duplicate_nhs_numbers_hilis3.xls'; my $xl = Spreadsheet::WriteExcel::Simple->new; $xl->write_bold_row( [ qw(p_id last_name first_name dob patient_no nhs_number) ] ); my $not_unique = 'SELECT NHSNo FROM PID GROUP BY NHSNo HAVING COUNT(NHSNo) > 1 ORDER BY NHSNo'; my $pid = 'SELECT P_ID, LName, FName, DoB, PatNo, NHSNo FROM PID WHERE NHSNo = ?'; my $result = $db->{dbix3}->query($not_unique); my %nhs_numbers; { no warnings 'uninitialized'; # eg null dob while ( my $nhs_no = $result->list ) { # print $nhs_no, "\n"; for my $row ( $db->{dbix3}->query($pid, $nhs_no)->arrays ) { # create hash of nhs_number => patient data: my $patient = join '~', @{$row}[1..3]; # warn Dumper $patient; $nhs_numbers{$nhs_no}{$patient}++; $xl->write_row($row); } } # warn Dumper \%nhs_numbers; } open my $log, '>', './duplicate_nhs_numbers.txt' or die $!; # print $log Dumper \%nhs_numbers; exit; # foreach nhs_number, need to know which has > 1 data set: while ( my ($nhs_number, $data) = each %nhs_numbers ) { # warn scalar keys %$data; next unless scalar keys %$data > 1; print $log join ' ', $nhs_number, Dumper $nhs_numbers{$nhs_number}; } $xl->save( './' . $out_file ); __END__ # use for hilis4 tables: DROP TABLE IF EXISTS duplicate_nhs_numbers; CREATE TEMPORARY TABLE duplicate_nhs_numbers ( nhs_number char(10) ); INSERT INTO duplicate_nhs_numbers (nhs_number) SELECT nhs_number FROM patients GROUP BY nhs_number HAVING COUNT(nhs_number) > 1; SELECT p.last_name, p.first_name, p.dob, p.nhs_number FROM patients p JOIN duplicate_nhs_numbers d on d.nhs_number = p.nhs_number ORDER BY p.last_name, p.first_name