#!/usr/bin/env perl =begin ------------------------------------------------------------------------- NGIS data extraction; require ngis-diagnosis.pl & ngis-test-directory.pl running 1st maps hilis4 lab_test_id to NGIS test id using hilis4.ngis_lab_test table: if lab_test_id entry in ngis_lab_test table: * if single row then use ngis_test_code (diagnosis doesn't matter so request doesn't need to be reported) * if lab_test has multiple ngis_test_codes, diagnosis.ngis_indiation_id is used to determine which (if any) ngis_test_code to use, where the diagnosis clinical indication must match one of the ngis test id's (eg HTS myeloid panel has matching codes for MDS, MPN, etc but not for CML) * if NGIS code cannot be found on diagnosis, using loose matching on presentation eg 'MDS' matches Suspected MDS, Follow-up MDS * last resort is to force NGIS code onto some test names (eg HTS myeloid) complications (see geneq_data_extract.log): * many hilis4 lab-tests do not yet have ngis test id's * many lab tests do not have a clinical indication mapped to the diagnosis * many diagnostic terms do not have any clinical indications (eg 'see comments', 'lab tests only', etc) * some requests not reported when data collected eg: multiple NGIS ids for 'HTS myeloid' and 'See comments' has no clinical indication id =cut #------------------------------------------------------------------------------- use Getopt::Std; getopts('m:tq'); # months, testing, query output our($opt_m,$opt_t,$opt_q); # warn $opt_t; exit; use strict; use warnings; use feature 'say'; my $JUST_TESTING = $opt_t || 0; # dumps xls file in /tmp only #============================================================================== my $duration = $opt_m || 1; # months; #============================================================================== use lib '/home/raj/perl5/lib/perl5'; use IO::All; use Data::Printer; use Data::Dumper; use SQL::Abstract::More; use List::Util qw(maxstr); use Spreadsheet::WriteExcel::Simple; use Time::HiRes qw(gettimeofday tv_interval); use List::MoreUtils qw(all any); # use Array::Utils qw(:all); use FindBin qw($Bin); # warn $Bin; use lib $Bin . '/../../lib'; use LIMS::Local::ScriptHelpers; use lib '/home/raj/perl-lib'; use Local::DB; my $tools = LIMS::Local::ScriptHelpers->new(); $tools->test_only($JUST_TESTING); my $today = $tools->time_now; my $dbix = Local::DB->dbix({ dbname => 'hilis4' }); $dbix->lc_columns = 0; # preserve mixed case on col names (now standardised) $Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col) my $log = $Bin . '/geneq_data_extract.log'; io($log)->print; # reset my %STASH; # for logging/debugging #------------------------------------------------------------------------------- my $filename = $Bin.'/geneq_data.xls'; my $ref_date = $today->clone->subtract( months => 1 ); # warn $ref_date; # check cytogenetics my @lab_sections = ( 'FISH', 'Molecular', 'Micro-array', 'High-throughput sequencing', 'Multiplex Ligation-dependent Probe Amplification' ); my @excluded_lab_tests = qw( cell_selection_quality 100kg_dna_extraction sanger_sequencing refer_material h_and_e_fish fish_h_and_e dna_control abl_control ); # may need to exclude D/RNA extractions: # push @excluded_lab_tests, qw( dna_extraction rna_extraction cd138_dna ); my @excluded_parent_codes = ( 'RWM', # Cardiff 'RVA', # Carmarthen 'RT8', # Conwy 'RVF', # Gwent 'RT9', # NE Wales 'RT7', # NW Wales 'RVC', # Swansea 'RQF', # Velidre, Wales '8EQ15', # Drs Lab 'MSDMDH', # Mater Dei 'X99999', 'V81999' # unknowns ); # look for loose match with presentation term: my %presentation_types = ( AML => 'M80', CML => 'M84', MDS => 'M82', MPN => 'M85', PNH => 'M82', Hodgkin => 'M95', 'B-ALL' => 'M91', myeloma => 'M92', # case-sensitive Aplastic => 'M82', Cytopenia => 'M82', # MDS ? check this 'HTS myeloid' => 'M82', # MDS #'B-LPD' => 'M93', # none configured #'Suspected lymphoma' => 'M93', # none configured ); # lab-tests done on RNA, rest done on DNA: =begin # using linked_lab_tests table now my %rna_tests = map +($_ => 1), qw( abl1_kinase_p190 abl1_kinase_p210 abl1_kinase_p230 bcr_abl_multiplex bcr_abl_p190 bcr_abl_p210 cbf_beta-myh11 ckit rq_cml cml_rq_b2a3_b3a3 etv6_runx1 pml_rara_bcl1 pml_rara_bcl3 runx1_runx1t1 ); # p %rna_tests; =cut my $rna_tests = get_rna_tests(); # p $rna_tests; # get panel_lab_tests: my $panel_lab_test = get_panel_lab_tests(); # p $panel_lab_test; my $registration_lab_tests = get_registration_lab_test_requests(); # p $registration_lab_tests; my $ngis_lab_tests = get_ngis_lab_tests(); # p $ngis_lab_tests; #------------------------------------------------------------------------------- my ($sql, @bind) = _get_query(); # p $sql; p @bind; my $t0 = [gettimeofday]; my $query = $dbix->query( $sql, @bind ); # get cols from query, except 'private' ones used for evaluations (eg _datetime_screened): my @headers = grep $_ !~ /^_/, $query->columns; # p @headers; my @data = $query->hashes; runtimer('query runtime'); my $xl = Spreadsheet::WriteExcel::Simple->new; $xl->write_bold_row(\@headers); runtimer('commencing data processing'); #------------------------------------------------------------------------------- # get request_ids and DNA/RNA extraction dates into hash map: my @request_ids = map $_->{internal_request_id}, @data; =begin # done using db lookup now for my $ref(@data) { if ( $ref->{test_name} =~ /^([DR]NA) extraction$/ ) { my $xna_type = $1; my $datetime = $ref->{_date_test_completed}; my $req_id = $ref->{internal_request_id}; $extraction_dates{$req_id}{$xna_type} = $datetime; # remove row: $ref = undef; } } # p %extraction_dates; # p @data; exit; =cut my $extraction_dates = get_extraction_dates_for_requests(\@request_ids); # p $extraction_dates; exit; #------------------------------------------------------------------------------- # pre-process data to reconstitute molecular panels: @data = reconstitute_molecular_panels(\@data); # p @data; # process @data: for my $ref (@data) { my $data = process_data($ref); # returns arrayref $xl->write_row($data); } runtimer('completed data processing'); $xl->save($filename); # p %STASH; my $no_ngis_code = $STASH{no_ngis_code}; my $unreported = $STASH{not_reported}; my $no_clinical_indication = $STASH{no_clinical_indication}; my $no_available_ngis_code = $STASH{no_available_ngis_code}; io($log)->appendln('=' x 10 . ' DIAGNOSIS & PRESENTATION WITH NO CLINICAL INDICATION ' . '=' x 10); io($log)->appendln($_) for @$no_clinical_indication; io($log)->appendln('=' x 10 . ' PRESENTATION WITH NO CLINICAL INDICATION ' . '& DIAGNOSIS WITH NO AVAILABLE NGIS CODE ' . '=' x 10); io($log)->appendln($_) for @$no_available_ngis_code; if ($unreported) { io($log)->appendln('=' x 40 . 'UNREPORTED REQUESTS' . '=' x 40); io($log)->appendln($_) for @$unreported; } io($log)->appendln('=' x 40 . 'LAB TESTS WITH NO NGIS ID' . '=' x 40); io($log)->appendln(join ': ', $_, $no_ngis_code->{$_}) for sort keys %{ $no_ngis_code }; #=============================================================================== sub runtimer { say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday] } sub process_data { my $ref = shift; # href # NGIS test code: $ref->{test_id} = get_ngis_test_code($ref); # determine date lab-test requested: $ref->{datetime_test_requested} = get_test_request_date($ref); # set datetime_testing_complete to test completion datetime: $ref->{datetime_testing_complete} = $ref->{_date_test_completed}; { # set datetime_reporting_complete to later of testing_complete or authorisation: if ( $ref->{_date_authorised} ) { my @dates = @{$ref}{ qw/_datetime_authorised _date_test_completed/ }; # dates in yyyy-mm-dd format can be handled as strings for determination # of chronology using List::Util::maxstr or could do $dateA gt $dateB: $ref->{datetime_reporting_complete} = maxstr(@dates); # List::Util } else { # not authorised, set to _date_test_completed: $ref->{datetime_reporting_complete} = $ref->{_date_test_completed} } } =begin # datetime_processing_complete now set to D/RNA extraction date # set datetime_processing_complete to later of authorisation date or test # completion date; value will be null if request not yet authorised: # p $ref->{sample_processed_date}; p $ref->{_date_authorised}; if ( $ref->{_date_authorised} ) { my @dates = @{$ref}{ qw/_date_authorised datetime_testing_complete/ }; # dates in yyyy-mm-dd format can be handled as strings for determination # of chronology using List::Util::maxstr or could do $dateA gt $dateB: $ref->{datetime_processing_complete} = maxstr(@dates); # List::Util } # else sample_completed_date remains null =cut $ref->{datetime_processing_complete} = get_extraction_date($ref); =begin # new template wants some cols temporarily NULL (commented out in query now): my @null_fields = qw/ testing_laboratory patient_first_name patient_last_name patient_dob nhs_number patient_address patient_postcode referrer_last_name referring_facility_name unit_number referral_reason time_sample_collected infection_risk sample_units sample_amount dna_concentration sample_tube_id sample_rack_id /; $ref->{$_} = undef for @null_fields; =cut return [ @{$ref}{@headers} ]; } sub _get_query { my @cols = ( 'CONCAT_WS("/", r.request_number, r.year - 2000)|local_sample_id', # 'NULL|local_sample_id_sub', # 'NULL|local_dna_number', # 'NULL|local_dna_number_sub', 'p.id|internal_patient_id', 'r.id|internal_request_id', q!"GeNEQ Leeds (HILIS)"|booking_laboratory!, q!"GeNEQ Leeds (HILIS)"|processing_laboratory!, q!"GeNEQ Leeds (HILIS)"|testing_laboratory!, q!"GeNEQ Leeds (HILIS)"|reporting_laboratory!, # 'NULL|ped_number', # 'p.first_name|patient_first_name', # 'p.last_name|patient_last_name', # 'p.dob|patient_dob', 'p.gender|patient_sex', # 'p.nhs_number', # 'pd.address|patient_address', # 'NULL|patient_district_of_residence', 'NULL|patient_city', # 'pd.post_code|patient_postcode', # 'NULL|patient_country', 'ref.national_code|referrer_ods_code', # 'NULL|referrer_email', # 'NULL|referrer_telephone', # 'NULL|referrer_title', # 'NULL|referrer_first_name', # 'ref.name|referrer_last_name', 'hd.display_name|referrer_specialty', # 'rs.display_name|referring_facility_name', # 'NULL|referrer_address', # 'NULL|referrer_city', # 'NULL|referrer_district', # 'NULL|referrer_postcode', # 'NULL|referrer_county', 'rs.organisation_code|referring_facility_ods_code', # 'NULLIF( pc.unit_number, "UNKNOWN")|unit_number', # NULL if a = b # 'NULL|other_unit_number', # 'IFNULL(rrv.clinical_details, "DiagnosticTest")|referral_reason', # a or b q!"Routine"|test_priority!, 'NULL|test_id', # NGIS test id (eg M80.6) 'lt.field_label|test_name', 'NULL|ngis_id', # NGIS order number ? 'NULL|datetime_test_requested', # date lab-test requested - calculated later # 'lt.test_type|request_comments', # panel or test q!GROUP_CONCAT(DISTINCT s.description SEPARATOR '; ') AS sample_type_received!, 'rsd.specimen_date|datetime_sample_collected', # datetime 'r.created_at|datetime_sample_received', # datetime 'r.created_at|datetime_order_received', # datetime 'NULL|datetime_processing_complete', # set later 'NULL|datetime_testing_complete', # set later 'NULL|datetime_reporting_complete', # set later 'rsd.specimen_quality|sample_condition', # q!MAX( CASE WHEN ao.option_name = 'doi' # THEN 'yes' ELSE 'no' END ) AS infection_risk!, # 'NULL|sample_type_dispatched', # 'IF( rst.volume, "ul", NULL )|sample_units', # 'rst.volume|sample_amount', # q!IF( rst.concentration < 9999, rst.concentration, NULL ) # AS dna_concentration!, # 'NULL|sample_date_sent', # 'rst.vialId|sample_tube_id', # 'sr.plateId|sample_rack_id', # 'NULL|sample_courier', # 'NULL|sample_courier_ref', # 'NULL|sample_processed_date', # renamed datetime_processing_complete # 'NULL|sample_completed_date', # renamed datetime_testing_complete # fields used for data processing only (underscored for omission in @headers): 'lt.id|_lab_test_id', 'lt.test_name|_test_name', 'ls.section_name|_section_name', 'd.name|_diagnosis', 'nci.code|_clinical_indication_code', 's2.description|_presentation', 'rrv.diagnosis_id|_diagnosis_id', q!MAX( CASE WHEN rsv.action = 'screened' THEN rsv.time END ) AS _datetime_screened!, # q!MAX( CASE WHEN rsv.action = 'reported' THEN rsv.time END ) # AS _datetime_reported!, # not used q!MAX( CASE WHEN rsv.action = 'authorised' THEN rsv.time END ) AS _datetime_authorised!, 'ts.time|_datetime_test_completed', # date lab-test set to complete ); my @rels = ( 'requests|r' , q{r.patient_case_id=pc.id} => 'patient_case|pc' , q{pc.patient_id=p.id} => 'patients|p' , q{pc.referral_source_id=rs.id} => 'referral_sources|rs' , q{rs.parent_organisation_id=po.id} => 'parent_organisations|po' , q{r.referrer_department_id=rd.id} => 'referrer_department|rd' , q{rd.hospital_department_code=hd.id} => 'hospital_departments|hd' , q{rd.referrer_id=ref.id} => 'referrers|ref' , q{rsd.request_id=r.id} => 'request_specimen_detail|rsd' , q{rsp.request_id=r.id} => 'request_specimen|rsp' , q{rsp.specimen_id=s.id} => 'specimens|s' , q{ts.request_id=r.id} => 'request_lab_test_status|ts' , q{ts.status_option_id=so.id} => 'lab_test_status_options|so' , q{ts.lab_test_id=lt.id} => 'lab_tests|lt' , q{lt.lab_section_id=ls.id} => 'lab_sections|ls' , q{rsv.request_id=r.id} => 'request_status_view|rsv' , q{ris.request_id=r.id} => 'request_initial_screen|ris' , q{ris.screen_id=s2.id} => 'screens|s2' , # left joins: q{=>rrv.request_id=r.id} => 'request_report_view|rrv' , q{=>rrv.diagnosis_id=d.id} => 'diagnoses|d' , q{=>d.ngis_indication_id=nci.id} => 'ngis_clinical_indications|nci', q{=>ro.request_id=r.id} => 'request_option|ro' , q{=>ro.option_id=ao.id} => 'additional_options|ao' , q{=>pd.patient_id=p.id} => 'patient_demographics|pd' , q{=>rst.request_id=r.id} => 'request_storage|rst' , q{=>rst.rack_id=sr.id} => 'storage_racks|sr' , q{=>rt.request_id=r.id} => 'request_trial|rt' , ); my %where = ( -and => [ # repeated elements (test_name, parent_code), requires arayref 'rt.request_id' => undef, # not in clinical trial 'so.description' => 'complete', # lab-test status 'MONTH(ts.time)' => $ref_date->month, # when test status = complete 'YEAR(ts.time)' => $ref_date->year, # when test status = complete 'ls.section_name' => { -in => \@lab_sections }, 'rs.organisation_code' => { -not_rlike => '^NT' }, # private hospital 'po.parent_code' => { -not_rlike => '^(S|Z)' }, # scotland/ireland 'po.parent_code' => { -not_in => \@excluded_parent_codes }, 'lt.test_name' => { -not_in => \@excluded_lab_tests }, 'lt.test_name' => { -not_rlike => 'quantification|store_[dr]na' }, ], ); my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!; my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, # -where => { 'r.id' => [280442,] }, -group_by => [ 'lt.id', 'r.id' ], -having => { $is_private => undef }, # not private patient -order_by => 'r.id', #-limit => 100, #-offset => 100, ); # p @args; # need to supply own injection_guard to override built-in sql injection attack # prevention - detects ';' in group_concat; modified from SQL::Abstract new() my @args_to_new = ( injection_guard => qr/^ \s* go \s/mi ); my ($sql, @bind) = SQL::Abstract::More->new(@args_to_new)->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $opt_q; # exit; return ($sql, @bind); } sub get_extraction_dates_for_requests { my $request_ids = shift; my @cols = ( 'ts.request_id', q/IF(lt.test_name='cd138_dna','CD138DNA',LEFT(lt.field_label,3))|type/, 'ts.time', ); my @rels = ( 'request_lab_test_status|ts', 'ts.lab_test_id=lt.id' => 'lab_tests|lt' , 'ts.status_option_id=so.id' => 'lab_test_status_options|so', ); my @extraction_tests = qw( dna_extraction rna_extraction cd138_dna ); my %where = ( 'so.description' => 'complete', 'lt.test_name' => { -in => \@extraction_tests }, 'ts.request_id' => { -in => $request_ids }, ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $opt_q; # exit; my $query = $dbix->query( $sql, @bind ); my %h; while ( my $ref = $query->hash ) { my $request_id = $ref->{request_id}; my $datetime = $ref->{time}; my $type = $ref->{type}; $h{$request_id}{$type} = $datetime; } return \%h; } sub get_extraction_date { my $ref = shift; # FISH tests not done on D/RNA extraction so can't get a processing date: return if $ref->{_section_name} eq 'FISH'; my $test_name = $ref->{_test_name}; my $lab_num = $ref->{local_sample_id}; my $req_id = $ref->{internal_request_id}; # get extraction dates for DNA and/or RNA: my $extraction = $extraction_dates->{$req_id}; say "no extraction for $lab_num $test_name" and return unless $extraction; # get expected material, RNA if test_name in %rna_tests, otherwise assume # DNA, except a small minority of tests done on CD138-selected DNA (see below): my $material = $rna_tests->{$test_name} ? 'RNA' : 'DNA'; # get extraction date for test material: my $date = $extraction->{$material}; # some tests (eg MLPA myeloma) are done on CD138-selected DNA - # key = CD138DNA in %extraction_dates: $date ||= $extraction->{CD138DNA}; # or give up - no extraction date available: say "no extraction date for $lab_num $test_name $material" and return unless $date; return $date; } sub get_test_request_date { my $ref = shift; # p $ref; my $request_id = $ref->{internal_request_id}; my $registered = $ref->{datetime_sample_received}; my $test_name = $ref->{test_name}; my $screened = $ref->{_datetime_screened}; # use manual request date if exists, or auto-request date, or date of screening: my $manual_request_date = get_manual_test_request($request_id, $test_name); # auto-requested by sample-type, therefore date = registration: my $auto_request_date = $registration_lab_tests->{$test_name} ? $registered : 0; # zero OK as will be tested for truth below # p [$manual_request_date, $auto_request_date, $screened]; return $manual_request_date || $auto_request_date || $screened; } sub get_registration_lab_test_requests { my @cols = ( 'DISTINCT t2.field_label', 1 ); my @rels = ('specimen_lab_test|t1', 't1.lab_test_id=t2.id', 'lab_tests|t2'); my @args = ( -columns => \@cols, -from => [ -join => @rels ], ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $opt_q; # exit; return $dbix->query($sql, @bind)->map; } sub get_rna_tests { my @rels = ( 'linked_lab_test|llt' , 'llt.parent_test_id=lt1.id' => 'lab_tests|lt1' , 'llt.linked_test_id=lt2.id' => 'lab_tests|lt2' , ); my @args = ( -columns => [ 'lt1.test_name', 1 ], -from => [ -join => @rels ], -where => { 'lt2.test_name' => 'rna_extraction' }, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $opt_q; # exit; my $map = $dbix->query( $sql, @bind )->map; # add rna_extraction in case extraction tests are required in data return: $map->{rna_extraction} = 1; return $map; } sub get_manual_test_request { my ($request_id, $test_name) = @_; my @args = ( -columns => [ 'time' ], -from => [ 'request_lab_test_history' ], -where => { request_id => $request_id, action => { -rlike => '^(auto-)?requested( linked test)? ' . $test_name }, }, -order_by => [ '-time' ], # ORDER BY time DESC -limit => 1, # returns most recent only if >1 ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; # $dbix->dump_query($sql, @bind) if $opt_q; # exit; return $dbix->query($sql, @bind)->value; } sub get_ngis_test_code { my $ref = shift; my $presentation = $ref->{_presentation}; my $diagnosis_id = $ref->{_diagnosis_id}; my $lab_test_id = $ref->{_lab_test_id}; my $lab_section = $ref->{_section_name}; my $test_name = $ref->{test_name}; my $diagnosis = $ref->{_diagnosis} || ''; my $lab_num = $ref->{local_sample_id}; # get NGIS code(s) for this lab-test: my $ngis_test_code = $ngis_lab_tests->{$lab_test_id}; # p $ngis_test_code; # aref unless ($ngis_test_code) { my $tile = "$test_name [$lab_section]"; $STASH{no_ngis_code}{$tile}++; # say "$lab_num: no NGIS id for $test_name"; return undef; } # return NGIS test code if it's the only one (even if not supported clinical # indication; TODO: this might not be suitable, might need to consider diagnosis): return $ngis_test_code->[0] if scalar @$ngis_test_code == 1; # now have multiple NGIS id's for lab-test # common message for debugging: my $msg = qq!$lab_num: multiple NGIS codes for '$test_name'!; my $str = "$lab_num: $test_name [diagnosis: $diagnosis;" . " presentation: $presentation]"; # vars for try_presentation(): my @vars_for_presentation = ($test_name, $presentation, $ngis_test_code); # now need to examine diagnosis: if (! $diagnosis_id) { if ( my $ngis_code = try_presentation(@vars_for_presentation) ) { # say "found $code from $presentation" return $ngis_code; } # io($log)->appendln("$msg and request is not reported"); push @{ $STASH{not_reported} }, $str and return undef; } # get clinical indication for diagnosis (eg AML NOS = M80) my $indication = $ref->{_clinical_indication_code}; # p $indication; if (! $indication) { # say 'no clinical indication'; if ( my $ngis_code = try_presentation(@vars_for_presentation) ) { # say "found $ngis_code from $presentation"; return $ngis_code; } # $STASH{no_clinical_indication}{$diagnosis}++; #io($log)->appendln("$msg and '$diagnosis' has no clinical indication id" # . " [presentation: $presentation]"); push @{ $STASH{no_clinical_indication} }, $str and return undef; } # p $indication; p $ngis_ids; # return NGIS code from list if has a matching clinical indication: for my $ngis_code (@$ngis_test_code) { # say "$lab_num: $indication -> $ngis_code" if $ngis_code =~ /^$indication/; return $ngis_code if $ngis_code =~ /^$indication/; } # last resort is to try to find NGIS code from presentation: if ( my $ngis_code = try_presentation(@vars_for_presentation) ) { # say "found $code from $presentation"; return $ngis_code; } # can't find an NGIS code for lab-test from presentation or diagnosis: #io($log)->appendln("$msg and diagnosis indication $indication [$diagnosis] " # . "did not match any available NGIS test codes [presentation: $presentation]"); # say "no NGIS id available for $test_name"; push @{ $STASH{no_available_ngis_code} }, "$lab_num: $test_name [diagnosis indication $indication [$diagnosis]; " . "presentation: $presentation]"; return undef; } sub try_presentation { # try to look for a loose match with presentation term: my ($test_name, $presentation, $ngis_test_code) = @_; # str, arrayref, str # p $presentation; p $ngis_test_code; p $test_name; for my $type ( keys %presentation_types ) { # p $type; if ( $presentation =~ /$type/ ) { # eg Suspected MDS =~ /MDS/ # say "$presentation matches $type"; for my $ngis_code (@$ngis_test_code) { # p $ngis_code; p $presentation_types{$type}; if ( $ngis_code =~ /^$presentation_types{$type}/ ) { # eg M84.2 =~ /^M84/ # say "$ngis_code matches $presentation_types{$type}"; return $ngis_code; } } } } { # if we're still here, no other match possible - try to force NGIS code: return $presentation_types{'HTS myeloid'} if $test_name eq 'HTS myeloid'; } return undef; } sub get_ngis_lab_tests { my $q = $dbix->select('ngis_lab_test', [qw/lab_test_id ngis_test_code/]); my %h; while ( my $ref = $q->array ) { # p $ref; my $lab_test_id = $ref->[0]; push @{ $h{$lab_test_id} }, $ref->[1]; } return \%h; } sub reconstitute_molecular_panels { my $data = shift; # p $data; # new data file: my @new; # keep track of request_id: my $last_request_id = 0; # holder for molucular tests: my @tests = (); # will be destroyed and recreated after each request ROW: for my $row (@$data) { my $lab_section = $row->{_section_name}; my $request_id = $row->{internal_request_id}; # if new request, handle molecular tests data, if any: if ( $last_request_id && $request_id != $last_request_id ) { if ( @tests ) { my @rows = molecular_tests_to_panels(\@tests); # p @rows; push @new, @rows; @tests = (); # reset for next request } } push @new, $row and next ROW if $lab_section ne 'Molecular'; # have molecular data row: my $test_name = $row->{test_name}; push @tests, $row; # set last_request_id to current request_id: $last_request_id = $request_id; } # p %tests; return wantarray ? @new : \@new; } sub molecular_tests_to_panels { my $ref = shift; # AoH's my @test_names = map $_->{test_name}, @$ref; # say 'test_names:'; p @test_names; # panels ordered in size of array, largest 1st to prevent matches with # smaller panels having common members (eg AML 60+ & AML under-60): PANEL: for my $t (@$panel_lab_test) { # eg AML 60+ => [NPM1, FLT3 ITD] my ($panel_name, $panel_tests) = @$t; # p @$panel_tests; # get only those array elements that exist in A and do not exist in B # warn $panel_name if intersect(@tests, @$panel_tests); # TODO: this is wrong - see H29519/18 - matches 'CD5+ BLPD RQ' but only has 1 matching probe # if ( my @set = intersect(@test_names, @$panel_tests) ) { # say 'matching set:'; p @set; # my @remainder = array_minus(@test_names, @$panel_tests); p @remainder; my $matching_panel = all { my $panel_test = $_; any {$_ eq $panel_test} @test_names; } @$panel_tests; if ( $matching_panel ) { my @set = @$panel_tests; # remove 1st element in @set into var: my $first_element_of_set = shift @set; # say "first_element_of_set: $first_element_of_set"; # my %seen = (); # only delete test name once (using DELETE_ME flag now) # for test_name of 1st entry in $ref matching 1st element of @set, # change it to the matched panel_name & delete the rest: DATA: for my $d (@$ref) { # p $d; # data href next DATA if $d->{DELETE_ME}; # already flagged for deletion my $test_name = $d->{test_name}; # p $test_name; # say "testing whether $test_name eq $first_element_of_set"; if ( $test_name eq $first_element_of_set ) { # say "changing $test_name to $panel_name"; $d->{test_name} = $panel_name; } elsif ( grep $test_name eq $_, @set ) { # say "$test_name in @set"; # say 'seen:'; p %seen; # set a flag for later: $d->{DELETE_ME}++; # say "flagging $test_name data for deletion"; # unless $seen{$test_name}; # undef $d unless $seen{$test_name}; # only delete once # $seen{$test_name}++; # should be done above but doesn't work!! } } } } # p $ref; # return original hashrefs not flagged for deletion: my @data = map $_, grep {! $_->{DELETE_ME} } @$ref; # say 'new rows'; p @data; return wantarray ? @data : \@data; } sub get_panel_lab_tests { my @cols = ( 'lt1.field_label|panel', 'lt2.field_label|test', ); my @rels = ( 'panel_lab_test|plt' , 'plt.panel_test_id=lt1.id' => 'lab_tests|lt1' , 'plt.lab_test_id=lt2.id' => 'lab_tests|lt2' , 'lt1.lab_section_id=ls.id' => 'lab_sections|ls' , ); # need to *locally* exclude DNA extraction, if not already excluded: my @local_excluded_tests = @excluded_lab_tests; # copy to keep local push @local_excluded_tests, 'dna_extraction' unless grep { $_ eq 'dna_extraction' } @local_excluded_tests; my %where = ( 'ls.section_name' => 'Molecular', 'lt2.test_name' => { -not_in => \@local_excluded_tests }, ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $opt_q; # exit; my $query = $dbix->query( $sql, @bind ); my %h; while ( my $ref = $query->hash ) { my $panel = $ref->{panel}; push @{ $h{$panel } }, $ref->{test}; } # p %h; # re-arrange in order of size of panel, largest 1st: my @data = map [ +($_ => $h{$_}) ], sort { scalar @{ $h{$b} } <=> scalar @{ $h{$a} } } keys %h; # p @data; return \@data; }