RSS Git Download  Clone
Raw Blame History
package AnonymiseDB;

use Moose;

use FindBin qw($RealBin); # warn $RealBin;
use Data::Random qw(:all);
use Data::Printer;
use Config::Auto;
use DateTime;

has existing_nhs_nos => ( is => 'ro', isa => 'HashRef', required => 1 );
has new_nhs_numbers  => (
    is => 'ro',
    isa => 'ArrayRef',
    lazy_build => 1,
);
has all_names    => ( is => 'ro',  isa => 'HashRef',  lazy_build => 1 );
has female_names => ( is  => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has male_names   => ( is  => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has first_names  => ( is  => 'ro', isa => 'ArrayRef', lazy_build => 1 );
has surnames     => ( is  => 'ro', isa => 'ArrayRef', lazy_build => 1 );

use constant YEAR => DateTime->today->year;

# replaces some $vals entries - doesn't return anything:
sub anonymise_patient {
	my $self = shift;
	my $vals = shift;

    my $substitute = $self->get_substitutes;

    my @last_names  = @{ $substitute->{last_names  } };
    my @first_names = @{ $substitute->{first_names } };
    my @boys_names  = @{ $substitute->{male_names  } };
    my @girls_names = @{ $substitute->{female_names} };

	my $first_name = '';
	my $gender = $vals->{gender} || '';

	if ($gender eq 'M') {
		$first_name = $boys_names[rand @boys_names];
	}
	elsif ($gender eq 'F') {
		$first_name = $girls_names[rand @girls_names];
	}
	else {
		$first_name = $first_names[rand @first_names];
	}

	# replace these $vals entries:
	$vals->{last_name} = lc $last_names[rand @last_names];
	$vals->{first_name} = lc $first_name;
    if ( $vals->{dob} ) { # Data::Random::rand_date
        $vals->{dob} = rand_date(min => '1920-1-1', max => YEAR . '-1-1');
    }
    if ( $vals->{nhs_number} ) {
        $vals->{nhs_number} = shift @{ $self->new_nhs_numbers };
    }
    if ( $vals->{unit_number} ) {
    }
}

sub anonymise_unit_number {
 	my ($self, $vals) = @_;
    my @digits = (0 .. 9); # will not modify 'UNKNOWN'
    $vals->{unit_number} =~ s/\d/$digits[rand @digits]/eg;
}

sub get_substitutes {
	my $self = shift;

    my @last_names  = @{ $self->surnames };
    my @first_names = @{ $self->first_names };
    my @boys_names  = @{ $self->male_names };
    my @girls_names = @{ $self->female_names };

    my %substitutes = (
        last_names   => \@last_names,
        first_names  => \@first_names,
        male_names   => \@boys_names,
        female_names => \@girls_names,
    );

    return \%substitutes;
}

sub _build_all_names {
    my $self = shift;

    my $names_src = "$RealBin/names.pl";
    my $names = Config::Auto::parse($names_src, format => 'perl'); # p $names;

    return $names;
}

sub _build_surnames {
    my $self = shift;

    my $names = $self->all_names;
    return $names->{surnames};
}

sub _build_male_names {
    my $self = shift;

    my $names = $self->all_names;
    return $names->{male_names};
}

sub _build_female_names {
    my $self = shift;

    my $names = $self->all_names;
    return $names->{female_names};
}

sub _build_first_names {
    my $self = shift;

    my @boys_names  = @{ $self->male_names };
    my @girls_names = @{ $self->female_names };

    return [ @boys_names, @girls_names ];
}

# returns list of unique valid 10-digit nhs_number:
sub _build_new_nhs_numbers {
    my $self = shift;

    my $existing_nhs_nos = $self->existing_nhs_nos; p $existing_nhs_nos;
    my $required = keys %$existing_nhs_nos;  p $required;

	my %new;

	while ( scalar keys %new < $required ) { # p scalar keys %new;
		my $nhs_number = join '', map { int rand 9 } 1 .. 10; # warn $nhs_number;
        # skip if number already exists in patients table (or will fail on insert
        # if not yet replaced):
        next if $existing_nhs_nos->{$nhs_number};
		$new{$nhs_number}++ if $self->_check_nhsno($nhs_number); # is valid
    } # p %new;
    return [ keys %new ];
}

sub _check_nhsno {
    my $self  = shift;
	my $nhsno = shift; # _dump(Dumper $nhsno);

	$nhsno =~ s/\s+//g;

	return unless length $nhsno == 10; # require 10 consecutive numbers only

	my $product;

	# sum of (each of the first nine digits * weighting factor):
	for (0 .. 8) { # 1st 9 digits of NHS no
		$product += ( substr($nhsno, $_, 1) * (10 - $_) );
	}

	# Divide $product by 11 and establish the remainder. Subtract the remainder from 11 to give
	# the check digit. If the result is 11 then a check digit of 0 is used.
	my $remainder = $product % 11 ? 11 - ($product % 11) : '0';

	# If the result is 10 then the NHS NUMBER is invalid. Check the remainder matches the check
	# digit. If it does not, the NHS NUMBER is invalid.
	return $remainder == substr($nhsno, 9, 1) && $remainder != '10'; # true if NHSNo correct
}

1;