#!/usr/bin/env perl

=begin -------------------------------------------------------------------------
NGIS data extraction; require ngis-diagnosis.pl & ngis-test-directory.pl running 1st

maps hilis4 lab_test_id to NGIS test id using hilis4.ngis_lab_test table:
if lab_test_id entry in ngis_lab_test table:
* if single row then use ngis_test_code (diagnosis doesn't matter so request
    doesn't need to be reported)
* if lab_test has multiple ngis_test_codes, diagnosis.ngis_indiation_id is used
    to determine which (if any) ngis_test_code to use, where the diagnosis
    clinical indication must match one of the ngis test id's (eg HTS myeloid
    panel has matching codes for MDS, MPN, etc but not for CML)
* if NGIS code cannot be found on diagnosis, using loose matching on presentation
    eg 'MDS' matches Suspected MDS, Follow-up MDS
* last resort is to force NGIS code onto some test names (eg HTS myeloid)

complications (see geneq_data_extract.log):
* many hilis4 lab-tests do not yet have ngis test id's
* many lab tests do not have a clinical indication mapped to the diagnosis
* many diagnostic terms do not have any clinical indications (eg 'see comments',
    'lab tests only', etc)
* some requests not reported when data collected
eg: multiple NGIS ids for 'HTS myeloid' and 'See comments' has no clinical
    indication id
=cut
#-------------------------------------------------------------------------------

use Getopt::Std;
getopts('m:tq'); # months, testing, query output
our($opt_m,$opt_t,$opt_q); # warn $opt_t; exit;

use strict;
use warnings;
use feature 'say';

my $JUST_TESTING = $opt_t || 0; # dumps xls file in /tmp only

#==============================================================================
my $duration = $opt_m || 1; # months;
#==============================================================================

use lib '/home/raj/perl5/lib/perl5';
use IO::All;
use Data::Printer;
use Data::Dumper;
use SQL::Abstract::More;
use List::Util qw(maxstr);
use Spreadsheet::WriteExcel::Simple;
use Time::HiRes qw(gettimeofday tv_interval);

use List::MoreUtils qw(all any);
# use Array::Utils qw(:all);

use FindBin qw($Bin); # warn $Bin;
use lib $Bin . '/../../lib';
use LIMS::Local::ScriptHelpers;

use lib '/home/raj/perl-lib';
use Local::DB;

my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

my $today = $tools->time_now;
my $dbix  = Local::DB->dbix({ dbname => 'hilis4' });
$dbix->lc_columns = 0; # preserve mixed case on col names (now standardised)

$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col)

my $log = $Bin . '/geneq_data_extract.log';
io($log)->print; # reset

my %STASH; # for logging/debugging

#-------------------------------------------------------------------------------
my $filename = $Bin.'/geneq_data.xls';
my $ref_date = $today->clone->subtract( months => 1 ); # warn $ref_date;

# check cytogenetics
my @lab_sections = (
    'FISH', 'Molecular', 'Micro-array', 'High-throughput sequencing',
    'Multiplex Ligation-dependent Probe Amplification'
);
my @excluded_lab_tests = qw(
    cell_selection_quality
    100kg_dna_extraction
    sanger_sequencing
    refer_material
    h_and_e_fish
    fish_h_and_e
    dna_control
    abl_control
);
# may need to exclude D/RNA extractions:
# push @excluded_lab_tests, qw( dna_extraction rna_extraction cd138_dna );

my @excluded_parent_codes = (
	'RWM', 	  # Cardiff
	'RVA', 	  # Carmarthen
	'RT8', 	  # Conwy
	'RVF', 	  # Gwent
	'RT9', 	  # NE Wales
	'RT7', 	  # NW Wales
	'RVC', 	  # Swansea
	'RQF', 	  # Velidre, Wales
	'8EQ15',  # Drs Lab
	'MSDMDH', # Mater Dei
	'X99999', 'V81999' # unknowns
);
# look for loose match with presentation term:
my %presentation_types = (
    AML       => 'M80',
    CML       => 'M84',
    MDS       => 'M82',
    MPN       => 'M85',
    PNH       => 'M82',
    Hodgkin   => 'M95',
    'B-ALL'   => 'M91',
    myeloma   => 'M92', # case-sensitive
    Aplastic  => 'M82',
    Cytopenia => 'M82', # MDS ? check this
    'HTS myeloid' => 'M82',  # MDS
    #'B-LPD'   => 'M93',            # none configured
    #'Suspected lymphoma' => 'M93', # none configured
);

# lab-tests done on RNA, rest done on DNA:
=begin # using linked_lab_tests table now
my %rna_tests = map +($_ => 1), qw(
    abl1_kinase_p190
    abl1_kinase_p210
    abl1_kinase_p230
    bcr_abl_multiplex
    bcr_abl_p190
    bcr_abl_p210
    cbf_beta-myh11
    ckit
    rq_cml
    cml_rq_b2a3_b3a3
    etv6_runx1
    pml_rara_bcl1
    pml_rara_bcl3
    runx1_runx1t1
); # p %rna_tests;
=cut
my $rna_tests = get_rna_tests(); # p $rna_tests;

# get panel_lab_tests:
my $panel_lab_test = get_panel_lab_tests(); # p $panel_lab_test;

my $registration_lab_tests = get_registration_lab_test_requests(); # p $registration_lab_tests;

my $ngis_lab_tests = get_ngis_lab_tests(); # p $ngis_lab_tests;
#-------------------------------------------------------------------------------

my ($sql, @bind) = _get_query(); # p $sql; p @bind;
my $t0 = [gettimeofday];

my $query = $dbix->query( $sql, @bind );

# get cols from query, except 'private' ones used for evaluations (eg _datetime_screened):
my @headers = grep $_ !~ /^_/, $query->columns; # p @headers;
my @data = $query->hashes; runtimer('query runtime');

my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row(\@headers);

runtimer('commencing data processing');

#-------------------------------------------------------------------------------
# get request_ids and DNA/RNA extraction dates into hash map:
my @request_ids = map $_->{internal_request_id}, @data;

=begin # done using db lookup now
for my $ref(@data) {
    if ( $ref->{test_name} =~ /^([DR]NA) extraction$/ ) {
        my $xna_type = $1;
        my $datetime = $ref->{_date_test_completed};
        my $req_id   = $ref->{internal_request_id};
        $extraction_dates{$req_id}{$xna_type} = $datetime;
        # remove row:
        $ref = undef;
    }
} # p %extraction_dates; # p @data; exit;
=cut
my $extraction_dates = get_extraction_dates_for_requests(\@request_ids);
# p $extraction_dates; exit;
#-------------------------------------------------------------------------------

# pre-process data to reconstitute molecular panels:
@data = reconstitute_molecular_panels(\@data); # p @data;

# process @data:
for my $ref (@data) {
    my $data = process_data($ref); # returns arrayref
	$xl->write_row($data);
}
runtimer('completed data processing');

$xl->save($filename);

# p %STASH;
my $no_ngis_code = $STASH{no_ngis_code};
my $unreported   = $STASH{not_reported};
my $no_clinical_indication = $STASH{no_clinical_indication};
my $no_available_ngis_code = $STASH{no_available_ngis_code};

io($log)->appendln('=' x 10
    . ' DIAGNOSIS & PRESENTATION WITH NO CLINICAL INDICATION ' . '=' x 10);
io($log)->appendln($_) for @$no_clinical_indication;

io($log)->appendln('=' x 10  . ' PRESENTATION WITH NO CLINICAL INDICATION '
    . '& DIAGNOSIS WITH NO AVAILABLE NGIS CODE ' . '=' x 10);
io($log)->appendln($_) for @$no_available_ngis_code;

if ($unreported) {
    io($log)->appendln('=' x 40 . 'UNREPORTED REQUESTS' . '=' x 40);
    io($log)->appendln($_) for @$unreported;
}

io($log)->appendln('=' x 40 . 'LAB TESTS WITH NO NGIS ID' . '=' x 40);
io($log)->appendln(join ': ', $_, $no_ngis_code->{$_})
    for sort keys %{ $no_ngis_code };

#===============================================================================
sub runtimer { say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday] }

sub process_data {
    my $ref = shift; # href

    # NGIS test code:
    $ref->{test_id} = get_ngis_test_code($ref);

    # determine date lab-test requested:
    $ref->{datetime_test_requested} = get_test_request_date($ref);

    # set datetime_testing_complete to test completion datetime:
    $ref->{datetime_testing_complete} = $ref->{_date_test_completed};

    { # set datetime_reporting_complete to later of testing_complete or authorisation:
        if ( $ref->{_date_authorised} ) {
            my @dates = @{$ref}{ qw/_datetime_authorised _date_test_completed/ };
            # dates in yyyy-mm-dd format can be handled as strings for determination
            # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
            $ref->{datetime_reporting_complete} = maxstr(@dates); # List::Util
        }
        else { # not authorised, set to _date_test_completed:
            $ref->{datetime_reporting_complete} = $ref->{_date_test_completed}
        }
    }
=begin # datetime_processing_complete now set to D/RNA extraction date
    # set datetime_processing_complete to later of authorisation date or test
    # completion date; value will be null if request not yet authorised:
        # p $ref->{sample_processed_date}; p $ref->{_date_authorised};
    if ( $ref->{_date_authorised} ) {
        my @dates = @{$ref}{ qw/_date_authorised datetime_testing_complete/ };
        # dates in yyyy-mm-dd format can be handled as strings for determination
        # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
        $ref->{datetime_processing_complete} = maxstr(@dates); # List::Util
    } # else sample_completed_date remains null
=cut
    $ref->{datetime_processing_complete} = get_extraction_date($ref);

=begin # new template wants some cols temporarily NULL (commented out in query now):
    my @null_fields = qw/
        testing_laboratory  patient_first_name  patient_last_name  patient_dob
        nhs_number  patient_address  patient_postcode  referrer_last_name
        referring_facility_name  unit_number  referral_reason
        time_sample_collected  infection_risk  sample_units  sample_amount
        dna_concentration  sample_tube_id  sample_rack_id
    /;
    $ref->{$_} = undef for @null_fields;
=cut
    return [ @{$ref}{@headers} ];
}

sub _get_query {
    my @cols = (
        'CONCAT_WS("/",  r.request_number, r.year - 2000)|local_sample_id',
#        'NULL|local_sample_id_sub',
#        'NULL|local_dna_number',
#        'NULL|local_dna_number_sub',
        'p.id|internal_patient_id',
        'r.id|internal_request_id',
        q!"GeNEQ Leeds (HILIS)"|booking_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|processing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|testing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|reporting_laboratory!,
#        'NULL|ped_number',
#        'p.first_name|patient_first_name',
#        'p.last_name|patient_last_name',
#        'p.dob|patient_dob',
        'p.gender|patient_sex',
#        'p.nhs_number',
#        'pd.address|patient_address',
#        'NULL|patient_district_of_residence',
        'NULL|patient_city',
#        'pd.post_code|patient_postcode',
#        'NULL|patient_country',
        'ref.national_code|referrer_ods_code',
#        'NULL|referrer_email',
#        'NULL|referrer_telephone',
#        'NULL|referrer_title',
#        'NULL|referrer_first_name',
#        'ref.name|referrer_last_name',
        'hd.display_name|referrer_specialty',
#        'rs.display_name|referring_facility_name',
#        'NULL|referrer_address',
#        'NULL|referrer_city',
#        'NULL|referrer_district',
#        'NULL|referrer_postcode',
#        'NULL|referrer_county',
        'rs.organisation_code|referring_facility_ods_code',
#        'NULLIF( pc.unit_number, "UNKNOWN")|unit_number', # NULL if a = b
#        'NULL|other_unit_number',
#        'IFNULL(rrv.clinical_details, "DiagnosticTest")|referral_reason', # a or b
        q!"Routine"|test_priority!,
        'NULL|test_id', # NGIS test id (eg M80.6)
        'lt.field_label|test_name',
        'NULL|ngis_id', # NGIS order number ?
        'NULL|datetime_test_requested', # date lab-test requested - calculated later
#        'lt.test_type|request_comments', # panel or test
        q!GROUP_CONCAT(DISTINCT s.description SEPARATOR '; ')
            AS sample_type_received!,
        'rsd.specimen_date|datetime_sample_collected',  # datetime
        'r.created_at|datetime_sample_received',        # datetime
        'r.created_at|datetime_order_received',         # datetime
        'NULL|datetime_processing_complete',            # set later
        'NULL|datetime_testing_complete',               # set later
        'NULL|datetime_reporting_complete',             # set later
        'rsd.specimen_quality|sample_condition',
#        q!MAX( CASE WHEN ao.option_name = 'doi'
#            THEN 'yes' ELSE 'no' END ) AS infection_risk!,
#        'NULL|sample_type_dispatched',
#        'IF( rst.volume, "ul", NULL )|sample_units',
#        'rst.volume|sample_amount',
#        q!IF( rst.concentration < 9999, rst.concentration, NULL )
#            AS dna_concentration!,
#        'NULL|sample_date_sent',
#        'rst.vialId|sample_tube_id',
#        'sr.plateId|sample_rack_id',
#        'NULL|sample_courier',
#        'NULL|sample_courier_ref',

#        'NULL|sample_processed_date',  # renamed datetime_processing_complete
#        'NULL|sample_completed_date',  # renamed datetime_testing_complete

    # fields used for data processing only (underscored for omission in @headers):
        'lt.id|_lab_test_id',
        'lt.test_name|_test_name',
        'ls.section_name|_section_name',
        'd.name|_diagnosis',
        'nci.code|_clinical_indication_code',
        's2.description|_presentation',
        'rrv.diagnosis_id|_diagnosis_id',
		q!MAX( CASE WHEN rsv.action = 'screened' THEN rsv.time END )
            AS _datetime_screened!,
		# q!MAX( CASE WHEN rsv.action = 'reported' THEN rsv.time END )
        #    AS _datetime_reported!, # not used
		q!MAX( CASE WHEN rsv.action = 'authorised' THEN rsv.time END )
            AS _datetime_authorised!,
        'ts.time|_datetime_test_completed', # date lab-test set to complete
    );
    my @rels = (                                'requests|r'                   ,
        q{r.patient_case_id=pc.id}           => 'patient_case|pc'              ,
        q{pc.patient_id=p.id}                => 'patients|p'                   ,
        q{pc.referral_source_id=rs.id}       => 'referral_sources|rs'          ,
        q{rs.parent_organisation_id=po.id}   => 'parent_organisations|po'      ,
        q{r.referrer_department_id=rd.id}    => 'referrer_department|rd'       ,
        q{rd.hospital_department_code=hd.id} => 'hospital_departments|hd'      ,
        q{rd.referrer_id=ref.id}		     => 'referrers|ref'                ,
        q{rsd.request_id=r.id}               => 'request_specimen_detail|rsd'  ,
        q{rsp.request_id=r.id}               => 'request_specimen|rsp'         ,
        q{rsp.specimen_id=s.id}              => 'specimens|s'                  ,
        q{ts.request_id=r.id}                => 'request_lab_test_status|ts'   ,
        q{ts.status_option_id=so.id}         => 'lab_test_status_options|so'   ,
        q{ts.lab_test_id=lt.id}              => 'lab_tests|lt'                 ,
        q{lt.lab_section_id=ls.id}           => 'lab_sections|ls'              ,
        q{rsv.request_id=r.id}               => 'request_status_view|rsv'      ,
        q{ris.request_id=r.id}               => 'request_initial_screen|ris'   ,
        q{ris.screen_id=s2.id}               => 'screens|s2'                   ,
      # left joins:
        q{=>rrv.request_id=r.id}             => 'request_report_view|rrv'      ,
        q{=>rrv.diagnosis_id=d.id}           => 'diagnoses|d'                  ,
        q{=>d.ngis_indication_id=nci.id}     => 'ngis_clinical_indications|nci',
        q{=>ro.request_id=r.id}              => 'request_option|ro'            ,
        q{=>ro.option_id=ao.id}              => 'additional_options|ao'        ,
        q{=>pd.patient_id=p.id}              => 'patient_demographics|pd'      ,
        q{=>rst.request_id=r.id}             => 'request_storage|rst'          ,
        q{=>rst.rack_id=sr.id}               => 'storage_racks|sr'             ,
        q{=>rt.request_id=r.id}              => 'request_trial|rt'             ,
    );
    my %where = (
        -and => [ # repeated elements (test_name, parent_code), requires arayref
            'rt.request_id'        => undef,      # not in clinical trial
            'so.description'       => 'complete', # lab-test status
            'MONTH(ts.time)'       => $ref_date->month,  # when test status = complete
            'YEAR(ts.time)'        => $ref_date->year,   # when test status = complete
            'ls.section_name'      => { -in => \@lab_sections },
            'rs.organisation_code' => { -not_rlike => '^NT' },    # private hospital
            'po.parent_code'       => { -not_rlike => '^(S|Z)' }, # scotland/ireland
            'po.parent_code'       => { -not_in => \@excluded_parent_codes },
            'lt.test_name'         => { -not_in => \@excluded_lab_tests },
            'lt.test_name'   => { -not_rlike => 'quantification|store_[dr]na' },
        ],
    );
    my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!;

    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
        # -where    => { 'r.id' => [280442,] },
        -group_by => [ 'lt.id', 'r.id' ],
        -having   => { $is_private => undef }, # not private patient
        -order_by => 'r.id',
        #-limit => 100,
        #-offset => 100,
    ); # p @args;
    # need to supply own injection_guard to override built-in sql injection attack
    # prevention - detects ';' in group_concat; modified from SQL::Abstract new()
    my @args_to_new = ( injection_guard => qr/^ \s* go \s/mi );
    my ($sql, @bind) = SQL::Abstract::More->new(@args_to_new)->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $opt_q; # exit;
    return ($sql, @bind);
}

sub get_extraction_dates_for_requests {
    my $request_ids = shift;

    my @cols = (
        'ts.request_id',
        q/IF(lt.test_name='cd138_dna','CD138DNA',LEFT(lt.field_label,3))|type/,
        'ts.time',
    );
    my @rels = (                       'request_lab_test_status|ts',
        'ts.lab_test_id=lt.id'      => 'lab_tests|lt'              ,
        'ts.status_option_id=so.id' => 'lab_test_status_options|so',
    );
    my @extraction_tests = qw( dna_extraction rna_extraction cd138_dna );
    my %where = (
        'so.description' => 'complete',
        'lt.test_name'   => { -in => \@extraction_tests },
        'ts.request_id'  => { -in => $request_ids },
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $opt_q; # exit;
    my $query = $dbix->query( $sql, @bind );

    my %h;
    while ( my $ref = $query->hash ) {
        my $request_id = $ref->{request_id};
        my $datetime   = $ref->{time};
        my $type       = $ref->{type};

        $h{$request_id}{$type} = $datetime;
    }
    return \%h;
}

sub get_extraction_date {
    my $ref = shift;
    # FISH tests not done on D/RNA extraction so can't get a processing date:
    return if $ref->{_section_name} eq 'FISH';

    my $test_name = $ref->{_test_name};
    my $lab_num   = $ref->{local_sample_id};
    my $req_id    = $ref->{internal_request_id};

    # get extraction dates for DNA and/or RNA:
    my $extraction = $extraction_dates->{$req_id};
    say "no extraction for $lab_num $test_name" and return
        unless $extraction;

    # get expected material, RNA if test_name in %rna_tests, otherwise assume
    # DNA, except a small minority of tests done on CD138-selected DNA (see below):
    my $material = $rna_tests->{$test_name} ? 'RNA' : 'DNA';
    # get extraction date for test material:
    my $date = $extraction->{$material};
    # some tests (eg MLPA myeloma) are done on CD138-selected DNA -
    # key = CD138DNA in %extraction_dates:
    $date ||= $extraction->{CD138DNA}; # or give up - no extraction date available:
    say "no extraction date for $lab_num $test_name $material" and return
        unless $date;
    return $date;
}

sub get_test_request_date {
    my $ref = shift; # p $ref;

    my $request_id = $ref->{internal_request_id};
    my $registered = $ref->{datetime_sample_received};
    my $test_name  = $ref->{test_name};
    my $screened   = $ref->{_datetime_screened};

    # use manual request date if exists, or auto-request date, or date of screening:
    my $manual_request_date = get_manual_test_request($request_id, $test_name);
    # auto-requested by sample-type, therefore date = registration:
    my $auto_request_date = $registration_lab_tests->{$test_name}
        ? $registered : 0; # zero OK as will be tested for truth below
        # p [$manual_request_date, $auto_request_date, $screened];

    return $manual_request_date || $auto_request_date || $screened;
}

sub get_registration_lab_test_requests {
    my @cols = ( 'DISTINCT t2.field_label', 1 );
    my @rels = ('specimen_lab_test|t1', 't1.lab_test_id=t2.id', 'lab_tests|t2');
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $opt_q; # exit;
    return $dbix->query($sql, @bind)->map;
}

sub get_rna_tests {
    my @rels = (                       'linked_lab_test|llt' ,
        'llt.parent_test_id=lt1.id' => 'lab_tests|lt1'       ,
        'llt.linked_test_id=lt2.id' => 'lab_tests|lt2'       ,
    );
    my @args = (
		-columns  => [ 'lt1.test_name', 1 ],
		-from     => [ -join => @rels ],
		-where    => { 'lt2.test_name' => 'rna_extraction' },
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $opt_q; # exit;
    my $map = $dbix->query( $sql, @bind )->map;
    # add rna_extraction in case extraction tests are required in data return:
    $map->{rna_extraction} = 1;
    return $map;
}

sub get_manual_test_request {
    my ($request_id, $test_name) = @_;

    my @args = (
		-columns  => [ 'time' ],
		-from     => [ 'request_lab_test_history' ],
        -where    => {
            request_id => $request_id,
            action => {
                -rlike => '^(auto-)?requested( linked test)? ' . $test_name
            },
        },
        -order_by => [ '-time' ], # ORDER BY time DESC
        -limit    => 1,           # returns most recent only if >1
    ); # p @args;

    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind) if $opt_q; # exit;
    return $dbix->query($sql, @bind)->value;
}

sub get_ngis_test_code {
    my $ref = shift;

    my $presentation = $ref->{_presentation};
    my $diagnosis_id = $ref->{_diagnosis_id};
    my $lab_test_id  = $ref->{_lab_test_id};
    my $lab_section  = $ref->{_section_name};
    my $test_name    = $ref->{test_name};
    my $diagnosis    = $ref->{_diagnosis} || '';
    my $lab_num      = $ref->{local_sample_id};

  # get NGIS code(s) for this lab-test:
    my $ngis_test_code = $ngis_lab_tests->{$lab_test_id}; # p $ngis_test_code; # aref
    unless ($ngis_test_code) {
        my $tile = "$test_name [$lab_section]";
        $STASH{no_ngis_code}{$tile}++;
        # say "$lab_num: no NGIS id for $test_name";
        return undef;
    }
  # return NGIS test code if it's the only one (even if not supported clinical
  # indication; TODO: this might not be suitable, might need to consider diagnosis):
    return $ngis_test_code->[0] if scalar @$ngis_test_code == 1;

  # now have multiple NGIS id's for lab-test
    # common message for debugging:
    my $msg = qq!$lab_num: multiple NGIS codes for '$test_name'!;
    my $str = "$lab_num: $test_name [diagnosis: $diagnosis;"
        . " presentation: $presentation]";

    # vars for try_presentation():
    my @vars_for_presentation = ($test_name, $presentation, $ngis_test_code);

   # now need to examine diagnosis:
    if (! $diagnosis_id) {
        if ( my $ngis_code = try_presentation(@vars_for_presentation) ) {
            # say "found $code from $presentation"
            return $ngis_code;
        }
        # io($log)->appendln("$msg and request is not reported");
        push @{ $STASH{not_reported} }, $str and return undef;
    }
  # get clinical indication for diagnosis (eg AML NOS = M80)
    my $indication = $ref->{_clinical_indication_code}; # p $indication;
    if (! $indication) { # say 'no clinical indication';
        if ( my $ngis_code = try_presentation(@vars_for_presentation) ) {
            # say "found $ngis_code from $presentation";
            return $ngis_code;
        }
        # $STASH{no_clinical_indication}{$diagnosis}++;
        #io($log)->appendln("$msg and '$diagnosis' has no clinical indication id"
        #    . " [presentation: $presentation]");
        push @{ $STASH{no_clinical_indication} }, $str and return undef;
    } # p $indication; p $ngis_ids;

  # return NGIS code from list if has a matching clinical indication:
    for my $ngis_code (@$ngis_test_code) {
        # say "$lab_num: $indication -> $ngis_code" if $ngis_code =~ /^$indication/;
        return $ngis_code if $ngis_code =~ /^$indication/;
    }
  # last resort is to try to find NGIS code from presentation:
    if ( my $ngis_code = try_presentation(@vars_for_presentation) ) {
        # say "found $code from $presentation";
        return $ngis_code;
    }

  # can't find an NGIS code for lab-test from presentation or diagnosis:
    #io($log)->appendln("$msg and diagnosis indication $indication [$diagnosis] "
    #    . "did not match any available NGIS test codes [presentation: $presentation]");
    # say "no NGIS id available for $test_name";
    push @{ $STASH{no_available_ngis_code} },
        "$lab_num: $test_name [diagnosis indication $indication [$diagnosis]; "
            . "presentation: $presentation]";
    return undef;
}

sub try_presentation { # try to look for a loose match with presentation term:
    my ($test_name, $presentation, $ngis_test_code) = @_; # str, arrayref, str
         # p $presentation; p $ngis_test_code; p $test_name;
    for my $type ( keys %presentation_types ) { # p $type;
        if ( $presentation =~ /$type/ ) { # eg Suspected MDS =~ /MDS/
            # say "$presentation matches $type";
            for my $ngis_code (@$ngis_test_code) {
                    # p $ngis_code; p $presentation_types{$type};
                if ( $ngis_code =~ /^$presentation_types{$type}/ ) { # eg M84.2 =~ /^M84/
                    # say "$ngis_code matches $presentation_types{$type}";
                    return $ngis_code;
                }
            }
        }
    }
    { # if we're still here, no other match possible - try to force NGIS code:
        return $presentation_types{'HTS myeloid'} if $test_name eq 'HTS myeloid';
    }
    return undef;
}

sub get_ngis_lab_tests {
    my $q = $dbix->select('ngis_lab_test', [qw/lab_test_id ngis_test_code/]);
    my %h;
    while ( my $ref = $q->array ) { # p $ref;
        my $lab_test_id = $ref->[0];
        push @{ $h{$lab_test_id} }, $ref->[1];
    }
    return \%h;
}

sub reconstitute_molecular_panels {
    my $data = shift; # p $data;

    # new data file:
    my @new;

    # keep track of request_id:
    my $last_request_id = 0;
    # holder for molucular tests:
    my @tests = (); # will be destroyed and recreated after each request

    ROW:
    for my $row (@$data) {
        my $lab_section = $row->{_section_name};
        my $request_id  = $row->{internal_request_id};
        # if new request, handle molecular tests data, if any:
        if ( $last_request_id && $request_id != $last_request_id ) {
            if ( @tests ) {
                my @rows = molecular_tests_to_panels(\@tests); # p @rows;
                push @new, @rows;
                @tests = (); # reset for next request
            }
        }
        push @new, $row and next ROW
            if $lab_section ne 'Molecular';
        # have molecular data row:
        my $test_name = $row->{test_name};
        push @tests, $row;
        # set last_request_id to current request_id:
        $last_request_id = $request_id;
    } # p %tests;

    return wantarray ? @new : \@new;
}

sub molecular_tests_to_panels {
    my $ref = shift; # AoH's

    my @test_names = map $_->{test_name}, @$ref; # say 'test_names:'; p @test_names;

    # panels ordered in size of array, largest 1st to prevent matches with
    # smaller panels having common members (eg AML 60+ & AML under-60):
    PANEL: for my $t (@$panel_lab_test) { # eg AML 60+ => [NPM1, FLT3 ITD]
        my ($panel_name, $panel_tests) = @$t; # p @$panel_tests;
        # get only those array elements that exist in A and do not exist in B
        # warn $panel_name if intersect(@tests, @$panel_tests);
# TODO: this is wrong - see H29519/18 - matches 'CD5+ BLPD RQ' but only has 1 matching probe
        # if ( my @set = intersect(@test_names, @$panel_tests) ) { # say 'matching set:'; p @set;
            # my @remainder = array_minus(@test_names, @$panel_tests);  p @remainder;
        my $matching_panel = all {
            my $panel_test = $_; any {$_ eq $panel_test} @test_names;
        } @$panel_tests;
        if ( $matching_panel ) {
            my @set = @$panel_tests;

            # remove 1st element in @set into var:
            my $first_element_of_set = shift @set; # say "first_element_of_set: $first_element_of_set";
            # my %seen = (); # only delete test name once (using DELETE_ME flag now)

            # for test_name of 1st entry in $ref matching 1st element of @set,
            # change it to the matched panel_name & delete the rest:
            DATA: for my $d (@$ref) { # p $d; # data href
                next DATA if $d->{DELETE_ME}; # already flagged for deletion
                my $test_name = $d->{test_name}; # p $test_name;
                # say "testing whether $test_name eq $first_element_of_set";
                if ( $test_name eq $first_element_of_set ) {
                    # say "changing $test_name to $panel_name";
                    $d->{test_name} = $panel_name;
                }
                elsif ( grep $test_name eq $_, @set ) {
                    # say "$test_name in @set"; # say 'seen:'; p %seen;
                    # set a flag for later:
                    $d->{DELETE_ME}++;
                    # say "flagging $test_name data for deletion"; # unless $seen{$test_name};
                    # undef $d unless $seen{$test_name}; # only delete once
                    # $seen{$test_name}++; # should be done above but doesn't work!!
                }
            }
        }
    } # p $ref;
    # return original hashrefs not flagged for deletion:
    my @data = map $_, grep {! $_->{DELETE_ME} } @$ref; # say 'new rows'; p @data;
    return wantarray ? @data : \@data;
}

sub get_panel_lab_tests {
    my @cols = (
    	'lt1.field_label|panel',
        'lt2.field_label|test',
    );
    my @rels = (                         'panel_lab_test|plt'  ,
        'plt.panel_test_id=lt1.id'    => 'lab_tests|lt1'       ,
        'plt.lab_test_id=lt2.id'      => 'lab_tests|lt2'       ,
        'lt1.lab_section_id=ls.id'    => 'lab_sections|ls'     ,
    );
    # need to *locally* exclude DNA extraction, if not already excluded:
    my @local_excluded_tests = @excluded_lab_tests; # copy to keep local
    push @local_excluded_tests, 'dna_extraction'
        unless grep { $_ eq 'dna_extraction' } @local_excluded_tests;

    my %where = (
        'ls.section_name' => 'Molecular',
        'lt2.test_name'   => { -not_in => \@local_excluded_tests },
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $opt_q; # exit;
    my $query = $dbix->query( $sql, @bind );

    my %h;
    while ( my $ref = $query->hash ) {
        my $panel = $ref->{panel};
        push @{ $h{$panel } }, $ref->{test};
    } # p %h;
    # re-arrange in order of size of panel, largest 1st:
    my @data = map [ +($_ => $h{$_}) ],
        sort { scalar @{ $h{$b} } <=> scalar @{ $h{$a} } } keys %h; # p @data;
    return \@data;
}