RSS Git Download  Clone
Raw Blame History
#!/usr/bin/env perl

=begin -------------------------------------------------------------------------
NGIS data extraction; require ngis-diagnosis.pl & ngis-test-directory.pl running 1st

maps hilis4 lab_test_id to NGIS test id using hilis4.ngis_lab_test table:
if lab_test_id entry in ngis_lab_test table:
* if single row then use ngis_test_code (diagnosis doesn't matter so request
    doesn't need to be reported)
* if lab_test has multiple ngis_test_codes, diagnosis.ngis_indiation_id is used
    to determine which (if any) ngis_test_code to use, where the diagnosis
    clinical indication must match one of the ngis test id's (eg HTS myeloid
    panel has matching codes for MDS, MPN, etc but not for CML)
* if NGIS code cannot be found on diagnosis, using loose matching on presentation,
    including previous screen terms used, eg 'MDS' matches Suspected MDS, Follow-up MDS
* last resort is to force NGIS code onto some test names (eg HTS myeloid)

complications (see geneq_data_extract.log):
* molecular panels are auto-expanded so excluding any by name doesn't work
* some hilis4 lab-tests/panels do not have ngis test id's
* some lab tests/panels do not have a clinical indication mapped to the diagnosis
* some diagnostic terms do not have any clinical indications (eg 'see comments',
    'lab tests only', etc)
* some requests not reported when data collected
eg: multiple NGIS ids for 'HTS myeloid' and 'See comments' has no clinical
    indication id
=cut
#-------------------------------------------------------------------------------

#==============================================================================
my $duration = 1; # default unless specified in command-line opts
#==============================================================================
my $testing = my $query_output = my $log_console = my $log_to_file = 0;
use Getopt::Long;
GetOptions (
    "months|m=i" => \$duration,       # int
    "testing|t"  => \$testing,        # flag
    "query|q"    => \$query_output,   # flag
    "console"    => \$log_console,    # flag
    "file"       => \$log_to_file,    # flag
); # warn $duration;

use strict;
use warnings;
use feature 'say';

my $JUST_TESTING = $testing || 0;

use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;

use IO::All;
use Data::Dumper;
use Data::Printer;
use List::Compare;
use Log::Any qw($log);
use Log::Any::Adapter;
use SQL::Abstract::More;
use List::Util qw(maxstr);
use Spreadsheet::WriteExcel::Simple;
use Time::HiRes qw(gettimeofday tv_interval);

use lib $Bin . '/../../lib';
use LIMS::Local::ScriptHelpers;

use lib '/home/raj/perl-lib';
use Local::DB;

my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

my $today = $tools->time_now;
my $dbix  = Local::DB->dbix({ dbname => 'hilis4' });
$dbix->lc_columns = 0; # preserve mixed case on col names (now standardised)

$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col)

my %STASH; # for logging/debugging

#-------------------------------------------------------------------------------
my $filename = $Bin.'/geneq_data.xls';
my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date;

my $logfile  = $Bin.'/geneq_data_extract.log'; # warn $logfile;
io($logfile)->print(); # reset

# for logging to console (can be reset later):
Log::Any::Adapter->set('Stdout') if $log_console;
# for logging to logfile (can be reset later):
Log::Any::Adapter->set(File => $logfile) if $log_to_file;

# check cytogenetics
my @lab_sections = (
    'FISH', 'Molecular', 'Micro-array', 'High-throughput sequencing',
    'Multiplex Ligation-dependent Probe Amplification'
);
# skip these lab-tests:
my %excluded_lab_tests = (
    FISH => [ qw(
        christie_myeloma_fusion_1
        christie_myeloma_fusion_2
        christie_myeloma_screen
        cell_selection_quality
        cytocell_cen11
        christie_cll
        h_and_e_fish
        pnh_myeloid
        cll_trial
        ebv_ish
        alpha18
        alpha17
        alpha12
        alpha11
        alpha8
    )],
    Molecular => [ qw(
        100kg_dna_extraction
        sanger_sequencing
        refer_material
        dna_control
        abl_control
    )],
);
# tests which are confirmatory and should not be included:
push @{ $excluded_lab_tests{FISH} }, qw(fgfr3);
push @{ $excluded_lab_tests{Molecular} }, qw(srsf2 asxl1);
# tests not currently on NGIS test directory (needs to be adaptable):
push @{ $excluded_lab_tests{FISH} }, qw(atm cytocell_atm);
# these don't work because molecular panels are auto-expanded to tests:
push @{ $excluded_lab_tests{Molecular} }, # panels:
    qw(burkitt_mutation cns_testicular_dlbcl); # p %excluded_lab_tests;

# may need to exclude D/RNA extractions:
my @xna_extractions = qw( dna_extraction rna_extraction cd138_dna );
# push @{ $excluded_lab_tests{Molecular} }, @xna_extractions;

my @excluded_parent_codes = (
	'RWM', 	  # Cardiff
	'RVA', 	  # Carmarthen
	'RT8', 	  # Conwy
	'RVF', 	  # Gwent
	'RT9', 	  # NE Wales
	'RT7', 	  # NW Wales
	'RVC', 	  # Swansea
	'RQF', 	  # Velidre, Wales
	'8EQ15',  # Drs Lab
	'MSDMDH', # Mater Dei
	'X99999', 'V81999' # unknowns
);

# look for loose match with presentation term:
my %presentation_types = (
    AML       => 'M80',
    CML       => 'M84',
    MDS       => 'M82',
    MPN       => 'M85',
    PNH       => 'M82',
    Hodgkin   => 'M95',
    'B-ALL'   => 'M91',
    myeloma   => 'M92', # case-sensitive
    Aplastic  => 'M82',
    Cytopenia => 'M82', # MDS
    # forcing lymphomas to clinical indication code for high-grade lymphoma (M99):
    # TODO: maybe better to force to prehistoric term B-NHL ??
    'Probable B-LPD'     => 'M99',
    'Suspected lymphoma' => 'M99',
    'Suspected B-cell lymphoma' => 'M99',
);

# force specific NGIS test codes (last resort):
my %forced_ngis_codes = (
    'BCR-ABL-tests' => 'M84.1', # CML
    'RUNX1-RUNX1T1' => 'M80.7', # AML
    'CBFbeta MYH11' => 'M80.7', # AML
    'HTS myeloid'   => 'M85.2', # Myeloproliferative neoplasm
    'CytoSNP'       => 'M82.2', # MDS
);

# molecular lab-tests done on RNA, rest done on DNA:
my $rna_tests = get_rna_tests(); # p $rna_tests;

# get all molecular panel lab tests:
my $molecular_panel_lab_test = get_panel_lab_tests('Molecular'); # p $molecular_panel_lab_test;
# get all FISH panel lab tests (will be modified to remove panels wiuth NGIS codes):
my $fish_panel_lab_test = get_panel_lab_tests('FISH'); # p $fish_panel_lab_test;

# get lab-tests auto-requested at registration (not used):
# my $registration_lab_tests = get_registration_lab_test_requests(); # p $registration_lab_tests;

# get list of molecular tests (for converting tests back to panels):
my $all_molecular_tests = get_molecular_tests(); # p $molecular_tests;

# get mapped lab_test.id => NGIS test code:
my $ngis_lab_tests = get_ngis_lab_tests(); # p $ngis_lab_tests;
#-------------------------------------------------------------------------------

my ($sql, @bind) = _get_query(); # p $sql; p @bind;
my $t0 = [gettimeofday];

my $query = $dbix->query( $sql, @bind );

# get cols from query, except 'private' ones used for evaluations (eg _datetime_screened):
my @headers = grep $_ !~ /^_/, $query->columns; # p @headers;
my @data = $query->hashes; runtimer('query runtime');

my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row(\@headers);

runtimer('commencing data processing');

# get request_ids and DNA/RNA extraction dates into hash map:
my $extraction_dates = do {
    my @request_ids = map $_->{internal_request_id}, @data;
    get_extraction_dates_for_requests(\@request_ids);
}; # p $extraction_dates; exit;

runtimer('reconstituting molecular panels');
# pre-process data to reconstitute molecular panels:
@data = reconstitute_molecular_panels(\@data); # p @data;
# pre-process data to split (some) fish panels (too complex to combine with above):
@data = split_fish_panels(\@data); # p @data;
runtimer('finished reconstituting molecular panels');

# process @data:
for my $ref (@data) {
    my $data = process_data($ref); # returns arrayref
	$xl->write_row($data);
}
runtimer('completed data processing');

$xl->save($filename);

# p %STASH;
my $no_ngis_code = $STASH{no_ngis_code};
my $unreported   = $STASH{not_reported};
my $no_clinical_indication = $STASH{no_clinical_indication};
my $no_available_ngis_code = $STASH{no_available_ngis_code};

# log summary data to $logfile, override any previous adapter settings =========
Log::Any::Adapter->set(File => $logfile);

$log->info('DIAGNOSIS & PRESENTATION WITH NO CLINICAL INDICATION ' . '=' x 20);
$log->info($_) for @$no_clinical_indication;

$log->info('DIAGNOSIS WITH NO AVAILABLE NGIS CODE & PRESENTATION WITH NO '
    . ' CLINICAL INDICATION ' . '=' x 10);
$log->info($_) for @$no_available_ngis_code;

if ($unreported) {
    $log->info('UNREPORTED REQUESTS ' . '=' x 40);
    $log->info($_) for @$unreported;
}

$log->info('LAB TESTS WITH NO NGIS ID ' . '=' x 40);
$log->info(join ': ', $_, $no_ngis_code->{$_})
    for sort keys %{ $no_ngis_code };
#===============================================================================

sub runtimer { say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday] }

sub process_data {
    my $ref = shift; # href

    # NGIS test code:
    $ref->{test_id} = get_ngis_test_code($ref);

    # determine date lab-test requested (replaced by datetime_order_received):
    # $ref->{datetime_test_requested} = get_test_request_date($ref);

    # set datetime_testing_complete to test completion datetime:
    $ref->{datetime_testing_complete} = $ref->{_date_test_completed};

    { # set datetime_reporting_complete to later of testing_complete or authorisation:
        if ( $ref->{_date_authorised} ) {
            my @dates = @{$ref}{ qw/_datetime_authorised _date_test_completed/ };
            # dates in yyyy-mm-dd format can be handled as strings for determination
            # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
            $ref->{datetime_reporting_complete} = maxstr(@dates); # List::Util
        }
        else { # not authorised, set to _date_test_completed:
            $ref->{datetime_reporting_complete} = $ref->{_date_test_completed}
        }
    }
=begin # datetime_processing_complete now set to D/RNA extraction date
    # set datetime_processing_complete to later of authorisation date or test
    # completion date; value will be null if request not yet authorised:
        # p $ref->{sample_processed_date}; p $ref->{_date_authorised};
    if ( $ref->{_date_authorised} ) {
        my @dates = @{$ref}{ qw/_date_authorised datetime_testing_complete/ };
        # dates in yyyy-mm-dd format can be handled as strings for determination
        # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
        $ref->{datetime_processing_complete} = maxstr(@dates); # List::Util
    } # else sample_completed_date remains null
=cut
    $ref->{datetime_processing_complete} = get_extraction_date($ref);

=begin # new template wants some cols temporarily NULL (commented out in query now):
    my @null_fields = qw/
        testing_laboratory  patient_first_name  patient_last_name  patient_dob
        nhs_number  patient_address  patient_postcode  referrer_last_name
        referring_facility_name  unit_number  referral_reason
        time_sample_collected  infection_risk  sample_units  sample_amount
        dna_concentration  sample_tube_id  sample_rack_id
    /;
    $ref->{$_} = undef for @null_fields;
=cut
    return [ @{$ref}{@headers} ];
}

sub _get_query {
    my @cols = (
        'CONCAT_WS("/",  r.request_number, r.year - 2000)|local_sample_id',
#        'NULL|local_sample_id_sub',
#        'NULL|local_dna_number',
#        'NULL|local_dna_number_sub',
        'p.id|internal_patient_id',
        'r.id|internal_request_id',
        q!"GeNEQ Leeds (HILIS)"|booking_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|processing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|testing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|reporting_laboratory!,
#        'NULL|ped_number',
#        'p.first_name|patient_first_name',
#        'p.last_name|patient_last_name',
#        'p.dob|patient_dob',
        'p.gender|patient_sex',
#        'p.nhs_number',
#        'pd.address|patient_address',
#        'NULL|patient_district_of_residence',
        'NULL|patient_city',
#        'pd.post_code|patient_postcode',
#        'NULL|patient_country',
        'ref.national_code|referrer_ods_code',
#        'NULL|referrer_email',
#        'NULL|referrer_telephone',
#        'NULL|referrer_title',
#        'NULL|referrer_first_name',
#        'ref.name|referrer_last_name',
        'hd.display_name|referrer_specialty',
#        'rs.display_name|referring_facility_name',
#        'NULL|referrer_address',
#        'NULL|referrer_city',
#        'NULL|referrer_district',
#        'NULL|referrer_postcode',
#        'NULL|referrer_county',
        'rs.organisation_code|referring_facility_ods_code',
#        'NULLIF( pc.unit_number, "UNKNOWN")|unit_number', # NULL if a = b
#        'NULL|other_unit_number',
#        'IFNULL(rrv.clinical_details, "DiagnosticTest")|referral_reason', # a or b
        q!"Routine"|test_priority!,
        'NULL|test_id', # NGIS test id (eg M80.6)
        'lt.field_label|test_name',
        'NULL|ngis_id', # NGIS order number ?
#        'NULL|datetime_test_requested', # replaced by datetime_order_received
#        'lt.test_type|request_comments', # panel or test
        q!GROUP_CONCAT(DISTINCT s.description SEPARATOR '; ')
            AS sample_type_received!,
        'rsd.specimen_date|datetime_sample_collected',  # datetime
        'r.created_at|datetime_sample_received',        # datetime
        'r.created_at|datetime_order_received',         # datetime
        'NULL|datetime_processing_complete',            # set later
        'NULL|datetime_testing_complete',               # set later
        'NULL|datetime_reporting_complete',             # set later
        'rsd.specimen_quality|sample_condition',
#        q!MAX( CASE WHEN ao.option_name = 'doi'
#            THEN 'yes' ELSE 'no' END ) AS infection_risk!,
#        'NULL|sample_type_dispatched',
#        'IF( rst.volume, "ul", NULL )|sample_units',
#        'rst.volume|sample_amount',
#        q!IF( rst.concentration < 9999, rst.concentration, NULL )
#            AS dna_concentration!,
#        'NULL|sample_date_sent',
#        'rst.vialId|sample_tube_id',
#        'sr.plateId|sample_rack_id',
#        'NULL|sample_courier',
#        'NULL|sample_courier_ref',

#        'NULL|sample_processed_date',  # renamed datetime_processing_complete
#        'NULL|sample_completed_date',  # renamed datetime_testing_complete

    # fields used for data processing only (underscored for omission in @headers):
        'lt.id|_lab_test_id',
        'lt.test_type|_test_type',
        'lt.test_name|_test_name',
        'ls.section_name|_section_name',
        'd.name|_diagnosis',
        'nci.code|_clinical_indication_code',
        's2.description|_presentation',
        'rrv.diagnosis_id|_diagnosis_id',
		# q!MAX( CASE WHEN rsv.action = 'screened' THEN rsv.time END )
        #    AS _datetime_screened!, # not used
		# q!MAX( CASE WHEN rsv.action = 'reported' THEN rsv.time END )
        #    AS _datetime_reported!, # not used
		q!MAX( CASE WHEN rsv.action = 'authorised' THEN rsv.time END )
            AS _datetime_authorised!,
        'ts.time|_datetime_test_completed', # date lab-test set to complete
    );
    my @rels = (                                'requests|r'                   ,
        q{r.patient_case_id=pc.id}           => 'patient_case|pc'              ,
        q{pc.patient_id=p.id}                => 'patients|p'                   ,
        q{pc.referral_source_id=rs.id}       => 'referral_sources|rs'          ,
        q{rs.parent_organisation_id=po.id}   => 'parent_organisations|po'      ,
        q{r.referrer_department_id=rd.id}    => 'referrer_department|rd'       ,
        q{rd.hospital_department_code=hd.id} => 'hospital_departments|hd'      ,
        q{rd.referrer_id=ref.id}		     => 'referrers|ref'                ,
        q{rsd.request_id=r.id}               => 'request_specimen_detail|rsd'  ,
        q{rsp.request_id=r.id}               => 'request_specimen|rsp'         ,
        q{rsp.specimen_id=s.id}              => 'specimens|s'                  ,
        q{ts.request_id=r.id}                => 'request_lab_test_status|ts'   ,
        q{ts.status_option_id=so.id}         => 'lab_test_status_options|so'   ,
        q{ts.lab_test_id=lt.id}              => 'lab_tests|lt'                 ,
        q{lt.lab_section_id=ls.id}           => 'lab_sections|ls'              ,
        q{rsv.request_id=r.id}               => 'request_status_view|rsv'      ,
        q{ris.request_id=r.id}               => 'request_initial_screen|ris'   ,
        q{ris.screen_id=s2.id}               => 'screens|s2'                   ,
      # left joins:
        q{=>rrv.request_id=r.id}             => 'request_report_view|rrv'      ,
        q{=>rrv.diagnosis_id=d.id}           => 'diagnoses|d'                  ,
        q{=>d.ngis_indication_id=nci.id}     => 'ngis_clinical_indications|nci',
        q{=>ro.request_id=r.id}              => 'request_option|ro'            ,
        q{=>ro.option_id=ao.id}              => 'additional_options|ao'        ,
        q{=>pd.patient_id=p.id}              => 'patient_demographics|pd'      ,
        q{=>rst.request_id=r.id}             => 'request_storage|rst'          ,
        q{=>rst.rack_id=sr.id}               => 'storage_racks|sr'             ,
        q{=>rt.request_id=r.id}              => 'request_trial|rt'             ,
    );
    my $excluded_lab_test_ids = get_excluded_lab_test_ids();
    my %where = (
        -and => [ # repeated elements (parent_code), requires arrayref
            'rt.request_id'        => undef,      # not in clinical trial
            'so.description'       => 'complete', # lab-test status
            'MONTH(ts.time)'       => $ref_date->month,  # when test status = complete
            'YEAR(ts.time)'        => $ref_date->year,   # when test status = complete
            'ls.section_name'      => { -in => \@lab_sections },
            'rs.organisation_code' => { -not_rlike => '^NT' },    # private hospital
            'po.parent_code'       => { -not_rlike => '^(S|Z)' }, # scotland/ireland
            'po.parent_code'       => { -not_in => \@excluded_parent_codes },
            'lt.id'                => { -not_in => $excluded_lab_test_ids },
            'lt.test_name'   => { -not_rlike => 'quantification|store_[dr]na' },
        ],
    );
    my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!;

    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
        # -where    => { 'r.id' => [280442,] },
        -group_by => [ 'lt.id', 'r.id' ],
        -having   => { $is_private => undef }, # not private patient
        -order_by => 'r.id',
        #-limit => 100,
        #-offset => 100,
    ); # p @args;
    # need to supply own injection_guard to override built-in sql injection attack
    # prevention - detects ';' in group_concat; modified from SQL::Abstract new()
    my @args_to_new = ( injection_guard => qr/^ \s* go \s/mi );
    my ($sql, @bind) = SQL::Abstract::More->new(@args_to_new)->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return ($sql, @bind);
}

sub get_excluded_lab_test_ids {
    my @test_ids;

    my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
    my %args = (
        -columns  => 'lt.id',
        -from     => [ -join => @rels ],
    );
    while ( my($section, $ref) = each %excluded_lab_tests ) {
        $args{'-where'} = {
            'ls.section_name' => $section,
            'lt.test_name'    => { -in => $ref },
        };
        my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind;
            $dbix->dump_query($sql, @bind) if $query_output; # exit;
        my @ids = $dbix->query($sql, @bind)->column; # ref
        push @test_ids, @ids;
    } # p @test_ids;
    return wantarray ? @test_ids : \@test_ids;
}

sub get_extraction_dates_for_requests {
    my $request_ids = shift;

    my @cols = (
        'ts.request_id',
        q/IF(lt.test_name='cd138_dna','CD138DNA',LEFT(lt.field_label,3))|type/,
        'ts.time',
    );
    my @rels = (                       'request_lab_test_status|ts',
        'ts.lab_test_id=lt.id'      => 'lab_tests|lt'              ,
        'ts.status_option_id=so.id' => 'lab_test_status_options|so',
    );
    my @extraction_tests = qw( dna_extraction rna_extraction cd138_dna );
    my %where = (
        'so.description' => 'complete',
        'lt.test_name'   => { -in => \@extraction_tests },
        'ts.request_id'  => { -in => $request_ids },
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    my $query = $dbix->query( $sql, @bind );

    my %h;
    while ( my $ref = $query->hash ) {
        my $request_id = $ref->{request_id};
        my $datetime   = $ref->{time};
        my $type       = $ref->{type};

        $h{$request_id}{$type} = $datetime;
    }
    return \%h;
}

sub get_extraction_date {
    my $ref = shift;
    # FISH tests not done on D/RNA extraction so can't get a processing date:
    return if $ref->{_section_name} eq 'FISH';

    my $test_name = $ref->{_test_name};
    my $lab_num   = $ref->{local_sample_id};
    my $req_id    = $ref->{internal_request_id};

    # get extraction dates for DNA and/or RNA:
    my $extraction = $extraction_dates->{$req_id};
    say "no extraction for $lab_num $test_name" and return
        unless $extraction;

    # get expected material, RNA if test_name in %rna_tests, otherwise assume
    # DNA, except a small minority of tests done on CD138-selected DNA (see below):
    my $material = $rna_tests->{$test_name} ? 'RNA' : 'DNA';
    # get extraction date for test material:
    my $date = $extraction->{$material};
    # some tests (eg MLPA myeloma) are done on CD138-selected DNA -
    # key = CD138DNA in %extraction_dates:
    $date ||= $extraction->{CD138DNA}; # or give up - no extraction date available:
    say "no extraction date for $lab_num $test_name $material" and return
        unless $date;
    return $date;
}

=begin # not used - replaced by datetime_order_received
sub get_test_request_date {
    my $ref = shift; # p $ref;

    my $request_id = $ref->{internal_request_id};
    my $registered = $ref->{datetime_sample_received};
    my $test_name  = $ref->{test_name};
    my $screened   = $ref->{_datetime_screened};

    # use manual request date if exists, or auto-request date, or date of screening:
    my $manual_request_date = get_manual_test_request($request_id, $test_name);
    # auto-requested by sample-type, therefore date = registration:
    my $auto_request_date = $registration_lab_tests->{$test_name}
        ? $registered : 0; # zero OK as will be tested for truth below
        # p [$manual_request_date, $auto_request_date, $screened];

    return $manual_request_date || $auto_request_date || $screened;
}
=cut

=begin # not used:
sub get_registration_lab_test_requests {
    my @cols = ( 'DISTINCT t2.field_label', 1 );
    my @rels = ('specimen_lab_test|t1', 't1.lab_test_id=t2.id', 'lab_tests|t2');
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query($sql, @bind)->map;
}
=cut

sub get_rna_tests {
    my @rels = (                       'linked_lab_test|llt' ,
        'llt.parent_test_id=lt1.id' => 'lab_tests|lt1'       ,
        'llt.linked_test_id=lt2.id' => 'lab_tests|lt2'       ,
    );
    my @args = (
		-columns  => [ 'lt1.test_name', 1 ],
		-from     => [ -join => @rels ],
		-where    => { 'lt2.test_name' => 'rna_extraction' },
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    my $map = $dbix->query( $sql, @bind )->map;
    # add rna_extraction in case extraction tests are required in data return:
    $map->{rna_extraction} = 1;
    return $map;
}

=begin # called from get_test_request_date(), not used
sub get_manual_test_request {
    my ($request_id, $test_name) = @_;

    my @args = (
		-columns  => [ 'time' ],
		-from     => [ 'request_lab_test_history' ],
        -where    => {
            request_id => $request_id,
            action => {
                -rlike => '^(auto-)?requested( linked test)? ' . $test_name
            },
        },
        -order_by => [ '-time' ], # ORDER BY time DESC
        -limit    => 1,           # returns most recent only if >1
    ); # p @args;

    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query($sql, @bind)->value;
}
=cut

sub get_ngis_test_code {
    my $ref = shift; p $ref if $ref->{test_name} =~ /Burkitt mutation/;

    my $presentation = $ref->{_presentation};
    my $diagnosis_id = $ref->{_diagnosis_id};
    my $lab_test_id  = $ref->{_lab_test_id};
    my $lab_section  = $ref->{_section_name};
    my $test_name    = $ref->{test_name};
    my $test_type    = $ref->{_test_type};
    my $diagnosis    = $ref->{_diagnosis} || '';
    my $lab_num      = $ref->{local_sample_id};

  # get NGIS code(s) for this lab-test:
    my $ngis_test_code = $ngis_lab_tests->{$lab_test_id}; # p $ngis_test_code; # aref
    unless ($ngis_test_code) {
        my $title = "$test_name $test_type [$lab_section]";
        $STASH{no_ngis_code}{$title}++;
        # say "$lab_num: no NGIS id for $test_name";
        return undef;
    }
  # return NGIS test code if it's the only one (even if not supported clinical
  # indication; TODO: this might not be suitable, might need to consider diagnosis):
    return $ngis_test_code->[0] if scalar @$ngis_test_code == 1;

  # now have multiple NGIS id's for lab-test
    # common message for debugging:
    my $msg = qq!$lab_num: multiple NGIS codes for '$test_name'!;
    my $str = "$lab_num: $test_name [diagnosis: $diagnosis;"
        . " presentation: $presentation]";

    # vars for try_presentation():
    my %vars_for_presentation = ( data => $ref, ngis_code => $ngis_test_code );

   # now need to examine diagnosis:
    if (! $diagnosis_id) {
        if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
            # say "found $ngis_code from $presentation";
            return $ngis_code;
        }
        # io($log)->appendln("$msg and request is not reported");
        push @{ $STASH{not_reported} }, $str and return undef;
    }
  # get clinical indication for diagnosis (eg AML NOS = M80)
    my $indication = $ref->{_clinical_indication_code}; # p $indication;
    if (! $indication) { # say 'no clinical indication';
        if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
            # say "found $ngis_code from $presentation";
            return $ngis_code;
        }
        # $STASH{no_clinical_indication}{$diagnosis}++;
        #io($log)->appendln("$msg and '$diagnosis' has no clinical indication id"
        #    . " [presentation: $presentation]");
        push @{ $STASH{no_clinical_indication} }, $str and return undef;
    } # p $indication; p $ngis_ids;

  # return NGIS code from list if has a matching clinical indication:
    for my $ngis_code (@$ngis_test_code) {
        # say "$lab_num: $indication -> $ngis_code" if $ngis_code =~ /^$indication/;
        return $ngis_code if $ngis_code =~ /^$indication/;
    }
  # last resort is to try to find NGIS code from presentation:
    if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
        # say "found $ngis_code from $presentation";
        return $ngis_code;
    }

  # can't find an NGIS code for lab-test from presentation or diagnosis:
    #io($log)->appendln("$msg and diagnosis indication $indication [$diagnosis] "
    #    . "did not match any available NGIS test codes [presentation: $presentation]");
    # say "no NGIS id available for $test_name";
    push @{ $STASH{no_available_ngis_code} },
        "$lab_num: $test_name [diagnosis indication $indication [$diagnosis]; "
            . "presentation: $presentation]";
    return undef;
}

sub try_presentation { # try to look for a loose match with presentation term:
    my $args = shift;

    my $presentation = $args->{data}->{_presentation};
    my $request_id   = $args->{data}->{internal_request_id};
    my $ngis_codes   = $args->{ngis_code};
    my $test_name    = $args->{data}->{test_name};
       # p $presentation; p $ngis_codes; p $test_name;

    # get any previous screening terms from history file:
    my @all_screens = get_request_history($request_id);
    push @all_screens, $presentation; # p @all_screens;

    for my $screen (@all_screens) {
        for my $type ( keys %presentation_types ) { # p $type; # eg AML, MDS
            if ( $screen =~ /$type/ ) { # eg Suspected MDS =~ /MDS/
                # say "$screen matches $type";
                for my $code (@$ngis_codes) { # eg M84.2
                        # p $code; p $presentation_types{$type};
                    if ( $code =~ /^$presentation_types{$type}/ ) { # eg M84.2 =~ /^M84/
                        # say "$code matches $presentation_types{$type}";
                        return $code;
                    }
                }
            }
        }
    }
    { # if we get this far, no other match possible, force NGIS code if test
        # configured in %forced_ngis_codes:
        for my $test ( keys %forced_ngis_codes ) {
            return $forced_ngis_codes{$test} if $test_name eq $test;
        }
        # BCR-ABL p210 RQ or BCR-ABL multiplex:
        return $forced_ngis_codes{'BCR-ABL-tests'} if $test_name =~ /BCR-ABL/;
    }
    return undef;
}

sub get_request_history {
    my $request_id = shift; # p $request_id;

    my $query = $dbix->select('request_history', 'action',
        {
            request_id => $request_id,
            action     => { -like => 'updated initial screen entry %' }
        }
    );
    my @screens;
    while ( my $action = $query->value ) { # p $action;
        if ( $action =~ /updated initial screen entry (.*)/ ) {
            push @screens, $1;
        }
    }
    return wantarray ? @screens : \@screens;
}

sub get_ngis_lab_tests {
    my $q = $dbix->select('ngis_lab_test', [qw/lab_test_id ngis_test_code/]);
    my %h;
    while ( my $ref = $q->array ) { # p $ref;
        my $lab_test_id = $ref->[0];
        push @{ $h{$lab_test_id} }, $ref->[1];
    }
    return \%h;
}

sub split_fish_panels { # only some FISH panels (those without NGIS map):
    my $data = shift; # p $data;

    my @non_ngis_fish_panels;
    my $required_fish_panels = get_fish_panel_names_without_ngis_codes(); # p $required_fish_panels;
    # remove panels from $fish_panel_lab_test with NGIS codes:
    for my $ref (@$fish_panel_lab_test) { # AoA
        my $panel_name = $ref->[0]; # p $panel_name;
        next unless grep { $panel_name eq $_ } @$required_fish_panels;
        # say "$panel_name in required_fish_panels";
        push @non_ngis_fish_panels, $ref;
    } # p @non_ngis_fish_panels;

    # new data file:
    my @new;

    return wantarray ? @new : \@new;
}

sub get_fish_panel_names_without_ngis_codes {
    my @rels = (                       'lab_tests|lt',
        'lt.lab_section_id=ls.id'   => 'lab_sections|ls',
        '=>nlt.lab_test_id=lt.id'   => 'ngis_lab_test|nlt'
    );
    my %where = (
        'ls.section_name' => 'FISH',
	    'lt.test_type'    => 'panel',
        'lt.test_name'    => { -not_in => $excluded_lab_tests{FISH} },
        'nlt.lab_test_id' => undef,
    );
    my @args = (
		-columns  => 'lt.field_label',
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query( $sql, @bind )->column;
}

sub reconstitute_molecular_panels {
    my $data = shift; # p $data;

    # new data file:
    my @new;

    # keep track of request_id:
    my $last_request_id = 0;
    # holder for molecular tests data:
    my @molecular_tests = (); # will be destroyed and recreated after each request

    ROW:
    for my $row (@$data) {
        my $lab_section = $row->{_section_name};
        my $request_id  = $row->{internal_request_id};
        # if new request, handle molecular tests data, if any:
        if ( @molecular_tests && $request_id != $last_request_id ) {
            $log->info("molecular tests for $last_request_id:",
                { set => [ map $_->{test_name}, @molecular_tests ] } );

            my @rows = molecular_tests_to_panels(\@molecular_tests); # p @rows;
            push @new, @rows;
            @molecular_tests = (); # reset for next request
        }
        push @new, $row and next ROW if $lab_section ne 'Molecular';
        # have molecular data row:
        push @molecular_tests, $row;
        # set last_request_id to current request_id:
        $last_request_id = $request_id;
    } # p %tests;

    return wantarray ? @new : \@new;
}

sub molecular_tests_to_panels {
    my $ref = shift; # AoH's

    my @all_molecular_tests = map $_->{test_name}, @$ref;
        # say 'test_names:'; p @all_molecular_tests;

    # panels ordered in size of array, largest 1st to prevent matches with
    # smaller panels having common members (eg AML 60+ & AML under-60):
    PANEL:
    for my $t (@$molecular_panel_lab_test) { # eg AML 60+ => [NPM1, FLT3 ITD]
        my ($panel_name, $panel_tests) = @$t; # p @$panel_tests;
        # don't need ordered list, use -u flag for speed boost:
        my $lc = List::Compare->new('-u', $panel_tests, \@all_molecular_tests);
        # if this panel list is a subset of all_molecular_tests:
        if ( $lc->is_LsubsetR ) { # is left-a-subset-of-right ?
            $log->info("$panel_name panel found in molecular set:",
                { panel_tests => $panel_tests } );

            my @set = @$panel_tests; # localise for element substitutions
            my %seen = (); # reset test name stash for this panel

            # remove 1st element in @set into a variable:
            my $first_element_of_set = shift @set;
                # say "first_element_of_set: $first_element_of_set";

            # for test name of 1st entry in $ref matching 1st element of @set,
            # change it to the matched panel_name & flag the rest for deletion:
            DATA:
            for my $d (@$ref) { # p $d; # data href
                next DATA if $d->{DELETE_ME}; # already flagged for deletion
                my $test_name = $d->{test_name}; # p $test_name;
                    # say "testing whether $test_name eq $first_element_of_set";
                if ( $test_name eq $first_element_of_set ) {
                    $log->info("changing $test_name to $panel_name");
                     say "changing $test_name to $panel_name";
                    $d->{test_name} = $panel_name;
                    # also need to change lab_test_id for finding NGIS test code:
                    $d->{_lab_test_id} = $all_molecular_tests->{$panel_name};
                    $d->{_test_type}   = 'panel'; # for log in case it's unmapped
                }
                elsif ( grep $test_name eq $_, @set ) {
                    # set a flag for later (only once per test in case it's
                    # re-requested or is part of another panel):
                    next DATA if $seen{$test_name}++;
                    $log->info("$test_name is in $panel_name set, flagging for deletion");
                    # say "$test_name is in $panel_name set, flagging for deletion";
                    $d->{DELETE_ME}++;
                }
            }
        }
    } # p $ref;
    $log->info('=' x 30);
    # return original hashrefs not flagged for deletion:
    my @data = map $_, grep {! $_->{DELETE_ME} } @$ref; # say 'new rows'; p @data;
    return wantarray ? @data : \@data;
}

sub get_molecular_tests {
    my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
    my @args = (
		-columns  => [ qw/lt.field_label lt.id/ ],
		-from     => [ -join => @rels ],
		-where    => { 'ls.section_name' => 'Molecular' },
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query( $sql, @bind )->map;
}

sub get_panel_lab_tests {
    my $section = shift;

    my @cols = (
    	'lt1.field_label|panel',
        'lt2.field_label|test',
    );
    my @rels = (                         'panel_lab_test|plt'  ,
        'plt.panel_test_id=lt1.id'    => 'lab_tests|lt1'       ,
        'plt.lab_test_id=lt2.id'      => 'lab_tests|lt2'       ,
        'lt1.lab_section_id=ls.id'    => 'lab_sections|ls'     ,
    );
    # need to *locally* exclude DNA extraction, if not already excluded:
    my @local_excluded_tests = @{ $excluded_lab_tests{Molecular} };
    push @local_excluded_tests, 'dna_extraction'
        unless grep { $_ eq 'dna_extraction' } @local_excluded_tests;

    my %where = (
        'ls.section_name' => $section,
        'lt1.is_active'   => 'yes',
        'lt2.test_name'   => { -not_in => \@local_excluded_tests },
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    my $query = $dbix->query( $sql, @bind );

    my %h;
    while ( my $ref = $query->hash ) {
        my $panel = $ref->{panel};
        push @{ $h{$panel } }, $ref->{test};
    } # p %h;
    # re-arrange in order of size of panel, largest 1st:
    my @data = map [ +($_ => $h{$_}) ],
        sort { scalar @{ $h{$b} } <=> scalar @{ $h{$a} } } keys %h; # p @data;
    return \@data;
}