package AnonymiseDB;
use Moose;
use FindBin qw($RealBin); # warn $RealBin;
use Data::Random qw(:all);
use Data::Printer;
use Config::Auto;
use DateTime;
has existing_nhs_nos => ( is => 'ro', isa => 'HashRef', required => 1 );
has new_nhs_numbers => (
is => 'ro',
isa => 'ArrayRef',
lazy_build => 1,
);
has all_names => ( is => 'ro', isa => 'HashRef', lazy_build => 1 );
has female_names => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has male_names => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has first_names => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has surnames => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 );
use constant YEAR => DateTime->today->year;
# replaces some $vals entries - doesn't return anything:
sub anonymise_patient {
my $self = shift;
my $vals = shift;
my $substitute = $self->get_substitutes;
my @last_names = @{ $substitute->{last_names } };
my @first_names = @{ $substitute->{first_names } };
my @boys_names = @{ $substitute->{male_names } };
my @girls_names = @{ $substitute->{female_names} };
my $first_name = '';
my $gender = $vals->{gender} || '';
if ($gender eq 'M') {
$first_name = $boys_names[rand @boys_names];
}
elsif ($gender eq 'F') {
$first_name = $girls_names[rand @girls_names];
}
else {
$first_name = $first_names[rand @first_names];
}
# replace these $vals entries:
$vals->{last_name} = lc $last_names[rand @last_names];
$vals->{first_name} = lc $first_name;
if ( $vals->{dob} ) { # Data::Random::rand_date
$vals->{dob} = rand_date(min => '1920-1-1', max => YEAR . '-1-1');
}
if ( $vals->{nhs_number} ) {
$vals->{nhs_number} = shift @{ $self->new_nhs_numbers };
}
if ( $vals->{unit_number} ) {
}
}
sub anonymise_unit_number {
my ($self, $vals) = @_;
my @digits = (0 .. 9); # will not modify 'UNKNOWN'
$vals->{unit_number} =~ s/\d/$digits[rand @digits]/eg;
}
sub get_substitutes {
my $self = shift;
my @last_names = @{ $self->surnames };
my @first_names = @{ $self->first_names };
my @boys_names = @{ $self->male_names };
my @girls_names = @{ $self->female_names };
my %substitutes = (
last_names => \@last_names,
first_names => \@first_names,
male_names => \@boys_names,
female_names => \@girls_names,
);
return \%substitutes;
}
sub _build_all_names {
my $self = shift;
my $names_src = "$RealBin/names.pl";
my $names = Config::Auto::parse($names_src, format => 'perl'); # p $names;
return $names;
}
sub _build_surnames {
my $self = shift;
my $names = $self->all_names;
return $names->{surnames};
}
sub _build_male_names {
my $self = shift;
my $names = $self->all_names;
return $names->{male_names};
}
sub _build_female_names {
my $self = shift;
my $names = $self->all_names;
return $names->{female_names};
}
sub _build_first_names {
my $self = shift;
my @boys_names = @{ $self->male_names };
my @girls_names = @{ $self->female_names };
return [ @boys_names, @girls_names ];
}
# returns list of unique valid 10-digit nhs_number:
sub _build_new_nhs_numbers {
my $self = shift;
my $existing_nhs_nos = $self->existing_nhs_nos; p $existing_nhs_nos;
my $required = keys %$existing_nhs_nos; p $required;
my %new;
while ( scalar keys %new < $required ) { # p scalar keys %new;
my $nhs_number = join '', map { int rand 9 } 1 .. 10; # warn $nhs_number;
# skip if number already exists in patients table (or will fail on insert
# if not yet replaced):
next if $existing_nhs_nos->{$nhs_number};
$new{$nhs_number}++ if $self->_check_nhsno($nhs_number); # is valid
} # p %new;
return [ keys %new ];
}
sub _check_nhsno {
my $self = shift;
my $nhsno = shift; # _dump(Dumper $nhsno);
$nhsno =~ s/\s+//g;
return unless length $nhsno == 10; # require 10 consecutive numbers only
my $product;
# sum of (each of the first nine digits * weighting factor):
for (0 .. 8) { # 1st 9 digits of NHS no
$product += ( substr($nhsno, $_, 1) * (10 - $_) );
}
# Divide $product by 11 and establish the remainder. Subtract the remainder from 11 to give
# the check digit. If the result is 11 then a check digit of 0 is used.
my $remainder = $product % 11 ? 11 - ($product % 11) : '0';
# If the result is 10 then the NHS NUMBER is invalid. Check the remainder matches the check
# digit. If it does not, the NHS NUMBER is invalid.
return $remainder == substr($nhsno, 9, 1) && $remainder != '10'; # true if NHSNo correct
}
1;