package AnonymiseDB; use Moose; use FindBin qw($RealBin); # warn $RealBin; use Data::Random qw(:all); use Data::Printer; use Config::Auto; use DateTime; has existing_nhs_nos => ( is => 'ro', isa => 'HashRef', required => 1 ); has new_nhs_numbers => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1, ); has all_names => ( is => 'ro', isa => 'HashRef', lazy_build => 1 ); has female_names => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 ); has male_names => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 ); has first_names => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 ); has surnames => ( is => 'ro', isa => 'ArrayRef', lazy_build => 1 ); use constant YEAR => DateTime->today->year; # replaces some $vals entries - doesn't return anything: sub anonymise_patient { my $self = shift; my $vals = shift; my $substitute = $self->get_substitutes; my @last_names = @{ $substitute->{last_names } }; my @first_names = @{ $substitute->{first_names } }; my @boys_names = @{ $substitute->{male_names } }; my @girls_names = @{ $substitute->{female_names} }; my $first_name = ''; my $gender = $vals->{gender} || ''; if ($gender eq 'M') { $first_name = $boys_names[rand @boys_names]; } elsif ($gender eq 'F') { $first_name = $girls_names[rand @girls_names]; } else { $first_name = $first_names[rand @first_names]; } # replace these $vals entries: $vals->{last_name} = lc $last_names[rand @last_names]; $vals->{first_name} = lc $first_name; if ( $vals->{dob} ) { # Data::Random::rand_date $vals->{dob} = rand_date(min => '1920-1-1', max => YEAR . '-1-1'); } if ( $vals->{nhs_number} ) { $vals->{nhs_number} = shift @{ $self->new_nhs_numbers }; } if ( $vals->{unit_number} ) { } } sub anonymise_unit_number { my ($self, $vals) = @_; my @digits = (0 .. 9); # will not modify 'UNKNOWN' $vals->{unit_number} =~ s/\d/$digits[rand @digits]/eg; } sub get_substitutes { my $self = shift; my @last_names = @{ $self->surnames }; my @first_names = @{ $self->first_names }; my @boys_names = @{ $self->male_names }; my @girls_names = @{ $self->female_names }; my %substitutes = ( last_names => \@last_names, first_names => \@first_names, male_names => \@boys_names, female_names => \@girls_names, ); return \%substitutes; } sub _build_all_names { my $self = shift; my $names_src = "$RealBin/names.pl"; my $names = Config::Auto::parse($names_src, format => 'perl'); # p $names; return $names; } sub _build_surnames { my $self = shift; my $names = $self->all_names; return $names->{surnames}; } sub _build_male_names { my $self = shift; my $names = $self->all_names; return $names->{male_names}; } sub _build_female_names { my $self = shift; my $names = $self->all_names; return $names->{female_names}; } sub _build_first_names { my $self = shift; my @boys_names = @{ $self->male_names }; my @girls_names = @{ $self->female_names }; return [ @boys_names, @girls_names ]; } # returns list of unique valid 10-digit nhs_number: sub _build_new_nhs_numbers { my $self = shift; my $existing_nhs_nos = $self->existing_nhs_nos; p $existing_nhs_nos; my $required = keys %$existing_nhs_nos; p $required; my %new; while ( scalar keys %new < $required ) { # p scalar keys %new; my $nhs_number = join '', map { int rand 9 } 1 .. 10; # warn $nhs_number; # skip if number already exists in patients table (or will fail on insert # if not yet replaced): next if $existing_nhs_nos->{$nhs_number}; $new{$nhs_number}++ if $self->_check_nhsno($nhs_number); # is valid } # p %new; return [ keys %new ]; } sub _check_nhsno { my $self = shift; my $nhsno = shift; # _dump(Dumper $nhsno); $nhsno =~ s/\s+//g; return unless length $nhsno == 10; # require 10 consecutive numbers only my $product; # sum of (each of the first nine digits * weighting factor): for (0 .. 8) { # 1st 9 digits of NHS no $product += ( substr($nhsno, $_, 1) * (10 - $_) ); } # Divide $product by 11 and establish the remainder. Subtract the remainder from 11 to give # the check digit. If the result is 11 then a check digit of 0 is used. my $remainder = $product % 11 ? 11 - ($product % 11) : '0'; # If the result is 10 then the NHS NUMBER is invalid. Check the remainder matches the check # digit. If it does not, the NHS NUMBER is invalid. return $remainder == substr($nhsno, 9, 1) && $remainder != '10'; # true if NHSNo correct } 1;