package YHHN;
use Moose;
with qw(
Role::User
Role::RebuildTables
Role::ReferralSource
);
use namespace::clean -except => 'meta';
use Data::Dumper;
has $_ => (is => 'ro', isa => 'HashRef', required => 1)
foreach qw( db sql );
has log_file => ( is => 'ro', required => 1 );
has $_ => ( is => 'rw', isa => 'HashRef[Str]', default => sub { {} })
foreach qw(nhs_numbers_map practice_ids orphaned_patient_ids);
has $_ => ( is => 'ro', isa => 'HashRef', lazy_build => 1 )
foreach qw(
tx_detail_id_map
location_id_map
patient_ids_map
tx_type_id_map
antecedent_map
site_id_map
);
__PACKAGE__->meta->make_immutable;
$|++;
# rebuilds:
sub convert {
my $self = shift;
# $self->do_demographics_and_chronologies(); # *** must be run once to populate _patient_ids tbl
# $self->do_treatments();
# $self->do_antecedent();
# $self->do_comment();
# $self->do_params();
$self->do_staging();
# $self->do_mdt();
# $self->do_history();
warn Dumper $self->orphaned_patient_ids;
}
sub do_demographics_and_chronologies {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
my $log_file = $self->log_file;
for ( qw/hmrn._patient_id hmrn.patient_chronologies patient_demographics/ ) {
$self->clear_table($_);
}
# get organisation_codes like '[^BCY]____' from referral_sources:
my $practices_map = $self->referral_source_practices;
# get distinct yhhn.gp_id's (ie org_code):
my $yhhn_practices = $dbix3->query(
q!select distinct(gp_id) from yhhn.demographics
where gp_id is not null and gp_id <> 'B00000'!
)->flat;
# ensure all practices in yhhn.demographics exist in referral_sources:
$self->add_new_practices_to_referral_sources($yhhn_practices); # return;
# get referral_source.id for unknown practice:
$dbix4->query(q!select id from referral_sources where organisation_code = 'V81999'!)
->into( my $unknown_practice_id); # warn $unknown_practice_id;
my %null_nhsno;
# get demographics table data:
my $query = $dbix3->query( q!select * from yhhn.demographics! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_patient_id = $vals->{d_pid};
my $nhs_number = $dbix3->query(
'select NHSNo from PID where P_ID = ?', $hilis3_patient_id
)->list;
unless ($nhs_number) {
$null_nhsno{$hilis3_patient_id}++; warn "no NHSNo for $vals->{d_pid}";
next DATA; # warn $nhs_number;
}
# skip if already seen nhs number:
next DATA if $self->nhs_numbers_map->{$nhs_number}++;
my $hilis4_patient_id = $dbix4->query(
'select id from patients where nhs_number = ?', $nhs_number
)->list;
unless ($hilis4_patient_id) {
warn "no hilis4_patient_id for $nhs_number";
next DATA; # warn $nhs_number;
}
# add to patient_id => nhs_number_map:
$dbix4->insert('hmrn._patient_id',
{ v3 => $hilis3_patient_id, v4 => $hilis4_patient_id }
);
# $self->patient_ids_map->{$hilis3_patient_id} = $hilis4_patient_id;
my $gp_code = $vals->{gp_id} || '';
$gp_code =~ s/\s//g; # trim blank spaces!!
my $status = $vals->{dod} ? 'dead' : 'alive';
my %data = (
patient_id => $hilis4_patient_id,
status => $status,
time => $vals->{timestamp},
);
# add address & post_code if exists:
map {
$data{$_} = $vals->{$_}
} grep $vals->{$_}, qw(address post_code dod);
# add practice_id if exists (document if not):
if ( my $practice_id = $practices_map->{$gp_code} ) {
$data{practice_id} = $practice_id;
}
else {
$data{practice_id} = $unknown_practice_id;
$self->practice_ids->{$gp_code}++; # next DATA unless $practice_id;
}
$dbix4->insert('patient_demographics', \%data);
{ # patient dates:
my %data = (
first_appointment => $vals->{first_app_date},
palliative_care => $vals->{palliative_date},
diagnosed => $vals->{diagnosed},
deceased => $vals->{dod},
patient_id => $hilis4_patient_id,
);
$dbix4->insert('hmrn.patient_chronologies', \%data);
}
}
warn Dumper $self->practice_ids;
while ( my ($nhs_number, $freq) = each %{ $self->nhs_numbers_map } ) {
next unless $freq > 1;
warn $nhs_number, "\n";
}
print $log_file $_, "\n" foreach sort keys %null_nhsno; # warn Dumper \%null_nhsno;
# warn Dumper $self->patient_id_nhs_number_map;
}
sub do_history {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.history');
my $query = $dbix3->query( q!select * from yhhn.history! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = int $vals->{h_pid}; # unsigned zerofill !!
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
# get user.id from History.UserID:
my $user_id = $self->_get_user_id(lc $vals->{userid})
|| warn "no user_id for $vals->{userid}";
my %data = (
patient_id => $hilis4_patient_id,
action => $vals->{action},
user_id => $user_id,
time => $vals->{datetime},
);
$dbix4->insert('hmrn.history', \%data);
}
}
sub do_antecedent {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
for ( qw/patient_antecedent patient_prior_therapies/ ) {
$self->clear_table('hmrn.'.$_);
}
my $map = $self->antecedent_map;
my $query = $dbix3->query( q!select * from yhhn.antecedent! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{an_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $hilis3_pat_id";
next DATA;
}
my $event = $vals->{event};
my $event_id = $map->{$event} || warn "no event_id for $event";
my %data = (
patient_id => $hilis4_patient_id,
event_id => $event_id,
timestamp => $vals->{timestamp},
);
$dbix4->insert('hmrn.patient_antecedent', \%data);
# do radiotherapy/chemotherapy if either exist:
next DATA unless grep $vals->{$_}, qw(radiotherapy chemotherapy);
delete $data{event_id};
for my $tx( qw/radiotherapy chemotherapy/ ) {
next unless $vals->{$tx};
# $data{$_} = $vals->{$_};
$data{therapy} = $tx; # warn Dumper \%data;
$dbix4->insert('hmrn.patient_prior_therapies', \%data);
}
# $dbix4->insert('hmrn.patient_prior_therapies', \%data);
}
}
sub do_comment {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.patient_comments');
my $cols = $self->_get_cols('patient_comments');
my $query = $dbix3->query( q!select * from yhhn.comment! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{c_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
my %data = ( patient_id => $hilis4_patient_id );
map $data{$_} = $vals->{$_}, @$cols;
$dbix4->insert('hmrn.patient_comments', \%data);
}
}
sub do_params {
my $self = shift;
my $dbh4 = $self->db->{dbh4};
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
my $data_tbl = 'hmrn.patient_params';
my $tmp_tbl = 'hmrn.temp';
$self->clear_table($data_tbl);
# create temp table:
$dbh4->do( qq!DROP TABLE IF EXISTS $tmp_tbl! );
$dbh4->do( qq!CREATE TABLE $tmp_tbl LIKE $data_tbl! );
my @tables = (
{ type => 'precursor', yhhn_table => 'acute', p_id => 'a_pid' },
{ type => 'myeloid', yhhn_table => 'myeloid', p_id => 'm_pid' },
{ type => 'lymphoid', yhhn_table => 'lymphoid', p_id => 'l_pid' },
{ type => 'plasmacell', yhhn_table => 'plasmacell', p_id => 'p_pid' },
);
$self->_do_data($_) for @tables;
# transfer data from temp to patient_params in patient_id order:
my $data = $dbix4->query( qq!select * from $tmp_tbl order by `patient_id`! );
while ( my $vals = $data->hash ) { # warn $vals->{request_id};
$dbix4->insert($data_tbl, $vals);
}
$dbh4->do( qq!DROP TABLE $tmp_tbl! );
}
sub _do_data {
my $self = shift;
my $cfg = shift;
my $type = $cfg->{type};
my $table = $cfg->{yhhn_table};
my $p_id = $cfg->{p_id};
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
my $fields = $self->_get_fields($type); # warn Dumper $fields;
my $query = $dbix3->query( qq!select * from yhhn.$table! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{$p_id};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
my $time = $vals->{timestamp};
my %data = (
patient_id => $hilis4_patient_id,
time => $time,
);
while ( my ($col, $val) = each %$vals) {
next unless grep $col eq $_, keys %$fields; # only want data cols
$val ||= 'U' if grep $col eq $_,
qw(splenomegaly hepatomegaly transfusion ct bm sweats fever wt_loss);
$val ||= 'unknown' if grep $col eq $_,
qw(karyotype detection_spleen detection_liver);
next unless defined $val; # skip empty cols - but NOT zeros
{ # correct errors/omissions:
$val = '777.0' if $col eq 'cd34' && $val > 999;
$val = 99.9 if ( grep $col eq $_, qw(hb b2m) ) && $val > 99.9;
$val =~ s/\d(\d{3})/$1/ if $col eq 'bj_level'; # 4 digits to 3
$val = 'U' if $col eq 'stage' && $val eq 'NK';
$val = int($val) if $col eq 'albumin'; # should be int not decimal
}
my $param_id = $fields->{$col} or die "no param_id for $col";
my $insert = { %data, param_id => $param_id, result => $val };
# warn Dumper $insert;
$dbix4->insert('hmrn.temp', $insert);
}
}
}
sub do_staging {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table("hmrn.$_") for
( qw/patient_staging_site patient_staging_other patient_sites_other/ );
my $site_id_map = $self->site_id_map; # warn Dumper $site_id_map;
my %patients;
my %h = (
nodal => 'ln_pid',
extranodal => 'en_pid',
);
while ( my ($table, $key) = each %h ) {
my $query = $dbix3->query( qq!select * from yhhn.$table! );
DATA:
while ( my $vals = $query->hash ) { # warn Dumper $vals;
my $hilis3_pat_id = $vals->{$key};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
SITE:
for my $site (keys %$site_id_map) { # warn $site;
$vals->{$site} || next SITE;
my $site_id = $site_id_map->{$site}; # warn $site_id_map->{$site};
push @{ $patients{$hilis4_patient_id} }, $site_id;
}
if ( my $others = $vals->{other}) {
my $data = { patient_id => $hilis4_patient_id, details => $others };
$dbix4->insert('hmrn.patient_sites_other', $data);
}
OTHER:
for my $opt( qw/bulky extensive check_ct/ ) {
next OTHER unless $vals->{$opt};
my $data = { patient_id => $hilis4_patient_id, detail => $opt };
$dbix4->insert('hmrn.patient_staging_other', $data);
}
}
} # warn Dumper \%patients;
while ( my ($patient_id, $site_ids) = each %patients ) {
for my $site_id (@$site_ids) {
$dbix4->insert('hmrn.patient_staging_site',
{
patient_id => $patient_id,
site_id => $site_id,
}
);
}
}
}
sub do_treatments {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.patient_treatment'); # warn Dumper $self->patient_ids_map;
my $tx_detail_id_map = $self->tx_detail_id_map; # warn Dumper $tx_detail_id_map;
my $location_id_map = $self->location_id_map; # warn Dumper $location_id_map;
my $tx_type_id_map = $self->tx_type_id_map; # warn Dumper $tx_type_id_map;
my $query = $dbix3->query( q!select * from yhhn.treatment! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{tx_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $hilis3_pat_id";
next DATA;
}
my $location = $vals->{hospital} || next DATA; # warn Dumper $location; # a few nulls
my $location_id = $location_id_map->{$location}
|| warn "no location id for $location";
my $tx_type = $vals->{type};
my $tx_type_id = $tx_type_id_map->{$tx_type};
unless ($tx_type_id) {
# warn "no tx_type id for $tx_type"; # a few null lines in table
next DATA;
}
my %data = (
patient_id => $hilis4_patient_id,
location_id => $location_id,
tx_type_id => $tx_type_id,
start_date => $vals->{tx_start_date},
end_date => $vals->{tx_end_date},
response => $vals->{response},
timestamp => $vals->{timestamp},
);
if ( grep $tx_type eq $_, qw/chemotherapy steroids/, 'clinical trial' ) {
my $regimen = $vals->{regimen} or warn "no regimen for $vals->{id}"; # warn $regimen;
my $tx_detail_id = $tx_detail_id_map->{$tx_type}->{$regimen}
|| $self->_get_tx_detail_id($regimen,\%data); # also updates tx_type if changed
unless ($tx_detail_id) {
warn "no tx_detail id for $tx_type $regimen";
}
$data{tx_detail_id} = $tx_detail_id;
}
elsif (my $regimen = $vals->{regimen}) {
my $tx_detail_id = $tx_detail_id_map->{agent}->{$regimen};
$data{tx_detail_id} = $tx_detail_id if $tx_detail_id;
} # warn Dumper \%data;
$dbix4->insert('hmrn.patient_treatment', \%data);
}
}
sub do_mdt {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.patient_mdt_dates'); # warn Dumper $self->patient_ids_map;
my $query = $dbix3->query( q!select * from yhhn.mdt! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{mdt_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
my %data = (
patient_id => $hilis4_patient_id,
date => $vals->{m_date},
# decision => $vals->{decision}, # not recording anymore
timestamp => $vals->{timestamp},
);
$dbix4->insert('hmrn.patient_mdt_dates', \%data);
}
}
sub _do_acute {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.acute_data');
my $cols = $self->_get_cols('acute_data');
my $query = $dbix3->query( q!select * from yhhn.acute! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{a_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
my %data = ( patient_id => $hilis4_patient_id );
map $data{$_} = $vals->{$_}, @$cols;
$dbix4->insert('hmrn.acute_data', \%data);
}
}
sub _do_myeloid {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.myeloid_data');
my $cols = $self->_get_cols('myeloid_data');
my $query = $dbix3->query( q!select * from yhhn.myeloid! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{m_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
my %data = ( patient_id => $hilis4_patient_id );
map $data{$_} = $vals->{$_}, @$cols;
# correct errors/omissions:
map $data{$_} ||= 'U', qw(splenomegaly hepatomegaly transfusion); # default
$data{cd34} = '777.0' if $data{cd34} && $data{cd34} > 999;
$dbix4->insert('hmrn.myeloid_data', \%data);
}
}
sub _do_plasmacell {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.plasmacell_data');
my $cols = $self->_get_cols('plasmacell_data');
my $query = $dbix3->query( q!select * from yhhn.plasmacell! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{p_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
my %data = ( patient_id => $hilis4_patient_id );
map $data{$_} = $vals->{$_}, @$cols;
# correct errors/omissions:
map { $data{$_} = 99.9 if $data{$_} && $data{$_} > 99.9 } qw(hb b2m);
$data{bj_level} =~ s/\d(\d{3})/$1/; # 4 digits to 3
$dbix4->insert('hmrn.plasmacell_data', \%data);
}
}
sub _do_lymphoid {
my $self = shift;
my $dbix3 = $self->db->{dbix3};
my $dbix4 = $self->db->{dbix4};
$self->clear_table('hmrn.lymphoid_data');
my $cols = $self->_get_cols('lymphoid_data');
my $query = $dbix3->query( q!select * from yhhn.lymphoid! );
DATA:
while ( my $vals = $query->hash ) {
my $hilis3_pat_id = $vals->{l_pid};
my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id};
unless ($hilis4_patient_id) {
$self->orphaned_patient_ids->{$hilis3_pat_id}++;
# warn "no patient_id for $pat_id";
next DATA;
}
my %data = ( patient_id => $hilis4_patient_id );
map $data{$_} = $vals->{$_}, @$cols;
# correct errors/omissions:
map $data{$_} ||= 'U', qw(ct bm sweats fever wt_loss); # default
map { $data{$_} = 99.9 if $data{$_} && $data{$_} > 99.9 } qw(hb b2m);
$dbix4->insert('hmrn.lymphoid_data', \%data);
}
}
sub _build_patient_ids_map {
my $self = shift;
my $dbh = $self->db->{dbix4};
my $map = $dbh->query('select v3,v4 from hmrn._patient_id')->map;
return $map;
}
sub _build_site_id_map {
my $self = shift;
my $dbh = $self->db->{dbix4};
my $map = $dbh->query('select description,id from hmrn.staging_sites')->map;
my %h = (
left => 'l',
right => 'r',
);
# substitutions:
while ( my ($description,$id) = each %$map ) {
next unless $description =~ /_(left|right)\Z/;
delete $map->{$description};
$description =~ s/(.*)_(left|right)/$1_$h{$2}/;
$map->{$description} = $id;
} # warn Dumper $map;
return $map;
}
sub _build_location_id_map {
my $self = shift;
my $dbh = $self->db->{dbix4};
my $map = $dbh->query('select location, id from hmrn.locations')->map;
return $map;
}
sub _build_tx_type_id_map {
my $self = shift;
my $dbh = $self->db->{dbix4};
my $map = $dbh->query('select description, id from hmrn.treatment_types')->map;
return $map;
}
sub _build_tx_detail_id_map {
my $self = shift;
my $dbh = $self->db->{dbix4};
my $sql = q!select d.id, d.description, t.description as 'type' from
hmrn.treatment_details d join hmrn.treatment_types t on d.type_id = t.id!;
my %map;
my $tx = $dbh->query($sql);
while ( my $vals = $tx->hash ) { # warn Dumper $vals;
my $type = $vals->{type};
my $description = $vals->{description};
$map{$type}{$description} = $vals->{id};
}
return \%map;
}
sub _build_antecedent_map {
my $self = shift;
my $dbh = $self->db->{dbix4};
my $map = $dbh->query( q!select event,id from hmrn.antecedent_events! )->map;
return $map;
}
sub _get_fields {
my ($self, $category) = @_;
my $dbh = $self->db->{dbix4};
my $sql = q!select p.param_name, p.id from hmrn.parameters p
join hmrn.category_parameter cp on cp.parameter_id = p.id
join hmrn.categories c on cp.category_id = c.id
where category = ?!;
my $map = $dbh->query($sql,$category)->map;
return $map;
}
sub _get_cols {
my ($self, $table) = @_;
my $dbh = $self->db->{dbix4};
my $t = $dbh->query("show columns from hmrn.$table")->arrays;
# warn Dumper $t;
my @cols = grep { $_ ne 'patient_id' } map $_->[0], @$t;
# warn Dumper \@cols;
return \@cols;
}
sub _get_tx_detail_id {
my ($self, $regimen, $data) = @_;
my $tx_detail_id_map = $self->tx_detail_id_map; # warn Dumper $tx_detail_id_map;
my $tx_type_id_map = $self->tx_type_id_map; # warn Dumper $tx_type_id_map;
if ($regimen eq 'G-CSF') { # moved from chemo to supportive:
$data->{tx_type_id} = $tx_type_id_map->{'supportive care'};
return $tx_detail_id_map->{'supportive care'}->{$regimen};
}
elsif ($regimen eq 'Chelating agents') { # moved from chemo to supportive:
$data->{tx_type_id} = $tx_type_id_map->{'supportive care'};
return $tx_detail_id_map->{'supportive care'}->{$regimen};
}
elsif ($regimen =~ /^BMT/) { # moved from chemo to supportive:
$data->{tx_type_id} = $tx_type_id_map->{'stem cell transplant'};
return $tx_detail_id_map->{'stem cell transplant'}->{$regimen};
}
elsif ($regimen eq 'P32') { # most P32 listed as chemo:
$data->{tx_type_id} = $tx_type_id_map->{radiotherapy};
return $tx_detail_id_map->{radiotherapy}->{$regimen};
}
elsif ($regimen eq 'PUVA') { # most PUVA listed as chemo:
$data->{tx_type_id} = $tx_type_id_map->{'non-haematological'};
return $tx_detail_id_map->{'non-haematological'}->{$regimen};
}
}
1;