package YHHN; use Moose; with qw( Role::User Role::RebuildTables Role::ReferralSource ); use namespace::clean -except => 'meta'; use Data::Dumper; has $_ => (is => 'ro', isa => 'HashRef', required => 1) foreach qw( db sql ); has log_file => ( is => 'ro', required => 1 ); has $_ => ( is => 'rw', isa => 'HashRef[Str]', default => sub { {} }) foreach qw(nhs_numbers_map practice_ids orphaned_patient_ids); has $_ => ( is => 'ro', isa => 'HashRef', lazy_build => 1 ) foreach qw( tx_detail_id_map location_id_map patient_ids_map tx_type_id_map antecedent_map site_id_map ); __PACKAGE__->meta->make_immutable; $|++; # rebuilds: sub convert { my $self = shift; # $self->do_demographics_and_chronologies(); # *** must be run once to populate _patient_ids tbl # $self->do_treatments(); # $self->do_antecedent(); # $self->do_comment(); # $self->do_params(); $self->do_staging(); # $self->do_mdt(); # $self->do_history(); warn Dumper $self->orphaned_patient_ids; } sub do_demographics_and_chronologies { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; my $log_file = $self->log_file; for ( qw/hmrn._patient_id hmrn.patient_chronologies patient_demographics/ ) { $self->clear_table($_); } # get organisation_codes like '[^BCY]____' from referral_sources: my $practices_map = $self->referral_source_practices; # get distinct yhhn.gp_id's (ie org_code): my $yhhn_practices = $dbix3->query( q!select distinct(gp_id) from yhhn.demographics where gp_id is not null and gp_id <> 'B00000'! )->flat; # ensure all practices in yhhn.demographics exist in referral_sources: $self->add_new_practices_to_referral_sources($yhhn_practices); # return; # get referral_source.id for unknown practice: $dbix4->query(q!select id from referral_sources where organisation_code = 'V81999'!) ->into( my $unknown_practice_id); # warn $unknown_practice_id; my %null_nhsno; # get demographics table data: my $query = $dbix3->query( q!select * from yhhn.demographics! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_patient_id = $vals->{d_pid}; my $nhs_number = $dbix3->query( 'select NHSNo from PID where P_ID = ?', $hilis3_patient_id )->list; unless ($nhs_number) { $null_nhsno{$hilis3_patient_id}++; warn "no NHSNo for $vals->{d_pid}"; next DATA; # warn $nhs_number; } # skip if already seen nhs number: next DATA if $self->nhs_numbers_map->{$nhs_number}++; my $hilis4_patient_id = $dbix4->query( 'select id from patients where nhs_number = ?', $nhs_number )->list; unless ($hilis4_patient_id) { warn "no hilis4_patient_id for $nhs_number"; next DATA; # warn $nhs_number; } # add to patient_id => nhs_number_map: $dbix4->insert('hmrn._patient_id', { v3 => $hilis3_patient_id, v4 => $hilis4_patient_id } ); # $self->patient_ids_map->{$hilis3_patient_id} = $hilis4_patient_id; my $gp_code = $vals->{gp_id} || ''; $gp_code =~ s/\s//g; # trim blank spaces!! my $status = $vals->{dod} ? 'dead' : 'alive'; my %data = ( patient_id => $hilis4_patient_id, status => $status, time => $vals->{timestamp}, ); # add address & post_code if exists: map { $data{$_} = $vals->{$_} } grep $vals->{$_}, qw(address post_code dod); # add practice_id if exists (document if not): if ( my $practice_id = $practices_map->{$gp_code} ) { $data{practice_id} = $practice_id; } else { $data{practice_id} = $unknown_practice_id; $self->practice_ids->{$gp_code}++; # next DATA unless $practice_id; } $dbix4->insert('patient_demographics', \%data); { # patient dates: my %data = ( first_appointment => $vals->{first_app_date}, palliative_care => $vals->{palliative_date}, diagnosed => $vals->{diagnosed}, deceased => $vals->{dod}, patient_id => $hilis4_patient_id, ); $dbix4->insert('hmrn.patient_chronologies', \%data); } } warn Dumper $self->practice_ids; while ( my ($nhs_number, $freq) = each %{ $self->nhs_numbers_map } ) { next unless $freq > 1; warn $nhs_number, "\n"; } print $log_file $_, "\n" foreach sort keys %null_nhsno; # warn Dumper \%null_nhsno; # warn Dumper $self->patient_id_nhs_number_map; } sub do_history { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.history'); my $query = $dbix3->query( q!select * from yhhn.history! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = int $vals->{h_pid}; # unsigned zerofill !! my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } # get user.id from History.UserID: my $user_id = $self->_get_user_id(lc $vals->{userid}) || warn "no user_id for $vals->{userid}"; my %data = ( patient_id => $hilis4_patient_id, action => $vals->{action}, user_id => $user_id, time => $vals->{datetime}, ); $dbix4->insert('hmrn.history', \%data); } } sub do_antecedent { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; for ( qw/patient_antecedent patient_prior_therapies/ ) { $self->clear_table('hmrn.'.$_); } my $map = $self->antecedent_map; my $query = $dbix3->query( q!select * from yhhn.antecedent! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{an_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $hilis3_pat_id"; next DATA; } my $event = $vals->{event}; my $event_id = $map->{$event} || warn "no event_id for $event"; my %data = ( patient_id => $hilis4_patient_id, event_id => $event_id, timestamp => $vals->{timestamp}, ); $dbix4->insert('hmrn.patient_antecedent', \%data); # do radiotherapy/chemotherapy if either exist: next DATA unless grep $vals->{$_}, qw(radiotherapy chemotherapy); delete $data{event_id}; for my $tx( qw/radiotherapy chemotherapy/ ) { next unless $vals->{$tx}; # $data{$_} = $vals->{$_}; $data{therapy} = $tx; # warn Dumper \%data; $dbix4->insert('hmrn.patient_prior_therapies', \%data); } # $dbix4->insert('hmrn.patient_prior_therapies', \%data); } } sub do_comment { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.patient_comments'); my $cols = $self->_get_cols('patient_comments'); my $query = $dbix3->query( q!select * from yhhn.comment! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{c_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } my %data = ( patient_id => $hilis4_patient_id ); map $data{$_} = $vals->{$_}, @$cols; $dbix4->insert('hmrn.patient_comments', \%data); } } sub do_params { my $self = shift; my $dbh4 = $self->db->{dbh4}; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; my $data_tbl = 'hmrn.patient_params'; my $tmp_tbl = 'hmrn.temp'; $self->clear_table($data_tbl); # create temp table: $dbh4->do( qq!DROP TABLE IF EXISTS $tmp_tbl! ); $dbh4->do( qq!CREATE TABLE $tmp_tbl LIKE $data_tbl! ); my @tables = ( { type => 'precursor', yhhn_table => 'acute', p_id => 'a_pid' }, { type => 'myeloid', yhhn_table => 'myeloid', p_id => 'm_pid' }, { type => 'lymphoid', yhhn_table => 'lymphoid', p_id => 'l_pid' }, { type => 'plasmacell', yhhn_table => 'plasmacell', p_id => 'p_pid' }, ); $self->_do_data($_) for @tables; # transfer data from temp to patient_params in patient_id order: my $data = $dbix4->query( qq!select * from $tmp_tbl order by `patient_id`! ); while ( my $vals = $data->hash ) { # warn $vals->{request_id}; $dbix4->insert($data_tbl, $vals); } $dbh4->do( qq!DROP TABLE $tmp_tbl! ); } sub _do_data { my $self = shift; my $cfg = shift; my $type = $cfg->{type}; my $table = $cfg->{yhhn_table}; my $p_id = $cfg->{p_id}; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; my $fields = $self->_get_fields($type); # warn Dumper $fields; my $query = $dbix3->query( qq!select * from yhhn.$table! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{$p_id}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } my $time = $vals->{timestamp}; my %data = ( patient_id => $hilis4_patient_id, time => $time, ); while ( my ($col, $val) = each %$vals) { next unless grep $col eq $_, keys %$fields; # only want data cols $val ||= 'U' if grep $col eq $_, qw(splenomegaly hepatomegaly transfusion ct bm sweats fever wt_loss); $val ||= 'unknown' if grep $col eq $_, qw(karyotype detection_spleen detection_liver); next unless defined $val; # skip empty cols - but NOT zeros { # correct errors/omissions: $val = '777.0' if $col eq 'cd34' && $val > 999; $val = 99.9 if ( grep $col eq $_, qw(hb b2m) ) && $val > 99.9; $val =~ s/\d(\d{3})/$1/ if $col eq 'bj_level'; # 4 digits to 3 $val = 'U' if $col eq 'stage' && $val eq 'NK'; $val = int($val) if $col eq 'albumin'; # should be int not decimal } my $param_id = $fields->{$col} or die "no param_id for $col"; my $insert = { %data, param_id => $param_id, result => $val }; # warn Dumper $insert; $dbix4->insert('hmrn.temp', $insert); } } } sub do_staging { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table("hmrn.$_") for ( qw/patient_staging_site patient_staging_other patient_sites_other/ ); my $site_id_map = $self->site_id_map; # warn Dumper $site_id_map; my %patients; my %h = ( nodal => 'ln_pid', extranodal => 'en_pid', ); while ( my ($table, $key) = each %h ) { my $query = $dbix3->query( qq!select * from yhhn.$table! ); DATA: while ( my $vals = $query->hash ) { # warn Dumper $vals; my $hilis3_pat_id = $vals->{$key}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } SITE: for my $site (keys %$site_id_map) { # warn $site; $vals->{$site} || next SITE; my $site_id = $site_id_map->{$site}; # warn $site_id_map->{$site}; push @{ $patients{$hilis4_patient_id} }, $site_id; } if ( my $others = $vals->{other}) { my $data = { patient_id => $hilis4_patient_id, details => $others }; $dbix4->insert('hmrn.patient_sites_other', $data); } OTHER: for my $opt( qw/bulky extensive check_ct/ ) { next OTHER unless $vals->{$opt}; my $data = { patient_id => $hilis4_patient_id, detail => $opt }; $dbix4->insert('hmrn.patient_staging_other', $data); } } } # warn Dumper \%patients; while ( my ($patient_id, $site_ids) = each %patients ) { for my $site_id (@$site_ids) { $dbix4->insert('hmrn.patient_staging_site', { patient_id => $patient_id, site_id => $site_id, } ); } } } sub do_treatments { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.patient_treatment'); # warn Dumper $self->patient_ids_map; my $tx_detail_id_map = $self->tx_detail_id_map; # warn Dumper $tx_detail_id_map; my $location_id_map = $self->location_id_map; # warn Dumper $location_id_map; my $tx_type_id_map = $self->tx_type_id_map; # warn Dumper $tx_type_id_map; my $query = $dbix3->query( q!select * from yhhn.treatment! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{tx_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $hilis3_pat_id"; next DATA; } my $location = $vals->{hospital} || next DATA; # warn Dumper $location; # a few nulls my $location_id = $location_id_map->{$location} || warn "no location id for $location"; my $tx_type = $vals->{type}; my $tx_type_id = $tx_type_id_map->{$tx_type}; unless ($tx_type_id) { # warn "no tx_type id for $tx_type"; # a few null lines in table next DATA; } my %data = ( patient_id => $hilis4_patient_id, location_id => $location_id, tx_type_id => $tx_type_id, start_date => $vals->{tx_start_date}, end_date => $vals->{tx_end_date}, response => $vals->{response}, timestamp => $vals->{timestamp}, ); if ( grep $tx_type eq $_, qw/chemotherapy steroids/, 'clinical trial' ) { my $regimen = $vals->{regimen} or warn "no regimen for $vals->{id}"; # warn $regimen; my $tx_detail_id = $tx_detail_id_map->{$tx_type}->{$regimen} || $self->_get_tx_detail_id($regimen,\%data); # also updates tx_type if changed unless ($tx_detail_id) { warn "no tx_detail id for $tx_type $regimen"; } $data{tx_detail_id} = $tx_detail_id; } elsif (my $regimen = $vals->{regimen}) { my $tx_detail_id = $tx_detail_id_map->{agent}->{$regimen}; $data{tx_detail_id} = $tx_detail_id if $tx_detail_id; } # warn Dumper \%data; $dbix4->insert('hmrn.patient_treatment', \%data); } } sub do_mdt { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.patient_mdt_dates'); # warn Dumper $self->patient_ids_map; my $query = $dbix3->query( q!select * from yhhn.mdt! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{mdt_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } my %data = ( patient_id => $hilis4_patient_id, date => $vals->{m_date}, # decision => $vals->{decision}, # not recording anymore timestamp => $vals->{timestamp}, ); $dbix4->insert('hmrn.patient_mdt_dates', \%data); } } sub _do_acute { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.acute_data'); my $cols = $self->_get_cols('acute_data'); my $query = $dbix3->query( q!select * from yhhn.acute! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{a_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } my %data = ( patient_id => $hilis4_patient_id ); map $data{$_} = $vals->{$_}, @$cols; $dbix4->insert('hmrn.acute_data', \%data); } } sub _do_myeloid { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.myeloid_data'); my $cols = $self->_get_cols('myeloid_data'); my $query = $dbix3->query( q!select * from yhhn.myeloid! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{m_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } my %data = ( patient_id => $hilis4_patient_id ); map $data{$_} = $vals->{$_}, @$cols; # correct errors/omissions: map $data{$_} ||= 'U', qw(splenomegaly hepatomegaly transfusion); # default $data{cd34} = '777.0' if $data{cd34} && $data{cd34} > 999; $dbix4->insert('hmrn.myeloid_data', \%data); } } sub _do_plasmacell { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.plasmacell_data'); my $cols = $self->_get_cols('plasmacell_data'); my $query = $dbix3->query( q!select * from yhhn.plasmacell! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{p_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } my %data = ( patient_id => $hilis4_patient_id ); map $data{$_} = $vals->{$_}, @$cols; # correct errors/omissions: map { $data{$_} = 99.9 if $data{$_} && $data{$_} > 99.9 } qw(hb b2m); $data{bj_level} =~ s/\d(\d{3})/$1/; # 4 digits to 3 $dbix4->insert('hmrn.plasmacell_data', \%data); } } sub _do_lymphoid { my $self = shift; my $dbix3 = $self->db->{dbix3}; my $dbix4 = $self->db->{dbix4}; $self->clear_table('hmrn.lymphoid_data'); my $cols = $self->_get_cols('lymphoid_data'); my $query = $dbix3->query( q!select * from yhhn.lymphoid! ); DATA: while ( my $vals = $query->hash ) { my $hilis3_pat_id = $vals->{l_pid}; my $hilis4_patient_id = $self->patient_ids_map->{$hilis3_pat_id}; unless ($hilis4_patient_id) { $self->orphaned_patient_ids->{$hilis3_pat_id}++; # warn "no patient_id for $pat_id"; next DATA; } my %data = ( patient_id => $hilis4_patient_id ); map $data{$_} = $vals->{$_}, @$cols; # correct errors/omissions: map $data{$_} ||= 'U', qw(ct bm sweats fever wt_loss); # default map { $data{$_} = 99.9 if $data{$_} && $data{$_} > 99.9 } qw(hb b2m); $dbix4->insert('hmrn.lymphoid_data', \%data); } } sub _build_patient_ids_map { my $self = shift; my $dbh = $self->db->{dbix4}; my $map = $dbh->query('select v3,v4 from hmrn._patient_id')->map; return $map; } sub _build_site_id_map { my $self = shift; my $dbh = $self->db->{dbix4}; my $map = $dbh->query('select description,id from hmrn.staging_sites')->map; my %h = ( left => 'l', right => 'r', ); # substitutions: while ( my ($description,$id) = each %$map ) { next unless $description =~ /_(left|right)\Z/; delete $map->{$description}; $description =~ s/(.*)_(left|right)/$1_$h{$2}/; $map->{$description} = $id; } # warn Dumper $map; return $map; } sub _build_location_id_map { my $self = shift; my $dbh = $self->db->{dbix4}; my $map = $dbh->query('select location, id from hmrn.locations')->map; return $map; } sub _build_tx_type_id_map { my $self = shift; my $dbh = $self->db->{dbix4}; my $map = $dbh->query('select description, id from hmrn.treatment_types')->map; return $map; } sub _build_tx_detail_id_map { my $self = shift; my $dbh = $self->db->{dbix4}; my $sql = q!select d.id, d.description, t.description as 'type' from hmrn.treatment_details d join hmrn.treatment_types t on d.type_id = t.id!; my %map; my $tx = $dbh->query($sql); while ( my $vals = $tx->hash ) { # warn Dumper $vals; my $type = $vals->{type}; my $description = $vals->{description}; $map{$type}{$description} = $vals->{id}; } return \%map; } sub _build_antecedent_map { my $self = shift; my $dbh = $self->db->{dbix4}; my $map = $dbh->query( q!select event,id from hmrn.antecedent_events! )->map; return $map; } sub _get_fields { my ($self, $category) = @_; my $dbh = $self->db->{dbix4}; my $sql = q!select p.param_name, p.id from hmrn.parameters p join hmrn.category_parameter cp on cp.parameter_id = p.id join hmrn.categories c on cp.category_id = c.id where category = ?!; my $map = $dbh->query($sql,$category)->map; return $map; } sub _get_cols { my ($self, $table) = @_; my $dbh = $self->db->{dbix4}; my $t = $dbh->query("show columns from hmrn.$table")->arrays; # warn Dumper $t; my @cols = grep { $_ ne 'patient_id' } map $_->[0], @$t; # warn Dumper \@cols; return \@cols; } sub _get_tx_detail_id { my ($self, $regimen, $data) = @_; my $tx_detail_id_map = $self->tx_detail_id_map; # warn Dumper $tx_detail_id_map; my $tx_type_id_map = $self->tx_type_id_map; # warn Dumper $tx_type_id_map; if ($regimen eq 'G-CSF') { # moved from chemo to supportive: $data->{tx_type_id} = $tx_type_id_map->{'supportive care'}; return $tx_detail_id_map->{'supportive care'}->{$regimen}; } elsif ($regimen eq 'Chelating agents') { # moved from chemo to supportive: $data->{tx_type_id} = $tx_type_id_map->{'supportive care'}; return $tx_detail_id_map->{'supportive care'}->{$regimen}; } elsif ($regimen =~ /^BMT/) { # moved from chemo to supportive: $data->{tx_type_id} = $tx_type_id_map->{'stem cell transplant'}; return $tx_detail_id_map->{'stem cell transplant'}->{$regimen}; } elsif ($regimen eq 'P32') { # most P32 listed as chemo: $data->{tx_type_id} = $tx_type_id_map->{radiotherapy}; return $tx_detail_id_map->{radiotherapy}->{$regimen}; } elsif ($regimen eq 'PUVA') { # most PUVA listed as chemo: $data->{tx_type_id} = $tx_type_id_map->{'non-haematological'}; return $tx_detail_id_map->{'non-haematological'}->{$regimen}; } } 1;