P3X GitList

#!/usr/bin/env perl

=begin -------------------------------------------------------------------------
NGIS data extraction; check ngis_* tables up-to-date

runs at 6am on 1st & 3rd Friday of month via cron:
0 6 1-7 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ...
0 6 15-21 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ...

manual run: $0 [-m <months>, -t <testing: saves files locally, doesn't email>,
  -q <query output>, --console <verbose log>, --file <log to file>,
  --cumulative <all requests since 1st of month of ref date>]

maps hilis4 lab tests to an NGIS test code using hilis4.ngis_lab_test table:
if lab_test_id entry(s) exist in ngis_lab_test table:
* if single row then use ngis_test_code value (diagnosis doesn't matter so request
    doesn't need to be reported)
* if lab_test has multiple ngis_test_codes, the diagnosis clinical indication code
    (ngis_diagnosis.ngis_indication_id) is used to determine which (if any)
    ngis_test_code to use, where the diagnosis clinical indication must match
    one of the available ngis test id's eg HTS myeloid test has matching codes
    for a diagnosis of MDS, MPN, etc but not for CML
* if NGIS test code cannot be found on diagnosis, a loose matching on presentation
    is attempted, including previous screen terms used, eg 'MDS' matches
    'Suspected MDS', 'Follow-up MDS'
* last resort is to force NGIS test code onto some test names (eg HTS myeloid)

* recipient does not want technology (FISH, PCR, HTS, etc) so ETV6/RUNX1 (FISH)
    & ETV6-RUNX1 (PCR) potentially ambiguous

* molecular tests are reassembled into panels (if panel has NGIS code)
* FISH panels without NGIS codes are expanded to individual tests

TODO: ngis_test_code list (@$ngis_test_code) with multiple identical indication
codes (eg HTS CLL/MZL, ABL1 kinase p190, etc) - 1st one (in ASCII sort order) is
selected that matches diagnosis indication code, this may not be correct

complications (see geneq_data_extract.log):
* hilis4 lab-tests/panels without ngis test id's - require default codes
* lab tests/panels without a clinical indication mapped to diagnosis or screen
* diagnostic terms without clinical indication (eg 'see comments', 'lab tests only', etc)
* requests not reported when data collected
=cut
#-------------------------------------------------------------------------------

#===============================================================================
my @data_recipients = qw( paul.mcintosh john.fraser raj );
my @log_recipients  = qw( turner talley raj );
#===============================================================================

my $query_output = 0; # --query|q - output sql queries to console
my $log_console  = 0; # --console - verbose log to console
my $verbose_log  = 0; # --verbose - verbose log to file (overrides --console)
my $cumulative   = 0; # --cumulative - all requests since 1st of month of ref_date
my $duration     = 1; # --month|m - default unless specified in command-line opts
my $testing      = 0; # --testing|t - saves file locally, doesn't email

use Getopt::Long;
GetOptions (
    "months|m=i" => \$duration,       # int
    "testing|t"  => \$testing,        # flag
    "query|q"    => \$query_output,   # flag
    "console"    => \$log_console,    # flag
    "verbose"    => \$verbose_log,    # flag
    "cumulative" => \$cumulative,     # flag
); # warn $duration; warn $cumulative; exit;

use strict;
use warnings;
use feature 'say';

my $JUST_TESTING = $testing || 0;

use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;

use IO::All;
use Data::Dumper;
use Clone 'clone';
use Data::Printer;
use List::Compare;
use Log::Any qw($log);
use Log::Any::Adapter;
use SQL::Abstract::More;
use List::Util qw(maxstr);
use Spreadsheet::WriteExcel::Simple;
use Time::HiRes qw(gettimeofday tv_interval);

use lib $Bin . '/../../../lib';
use LIMS::Local::ScriptHelpers;

use lib '/home/raj/perl-lib';
use Local::WriteExcel;
use Local::DB;

my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

my $config = $tools->config();
my $today  = $tools->time_now;
my $dbix   = Local::DB->dbix({ dbname => 'hilis4' });
$dbix->lc_columns = 0; # preserve mixed case on col names (now standardised)

$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col)

my %STASH; # for logging/debugging

#-------------------------------------------------------------------------------
my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date;
my $filename = sprintf 'GeNEQ_HMDS_data_%s.xls', $ref_date->strftime('%b_%Y'); # warn $filename;
my $subject  = sprintf 'GeNEQ HMDS data %s', $ref_date->strftime('%b %Y');

my $logfile  = sprintf '%s/geneq_data_extract_%s.log',
    $Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y'); # warn $logfile;

# for verbose logging to console (can be set or reset later):
Log::Any::Adapter->set('Stdout') if $log_console;
# for verbose logging to logfile (can be set or reset later):
Log::Any::Adapter->set(File => $logfile) if $verbose_log;

# check cytogenetics
my @lab_sections = (
    'FISH', 'Molecular', 'Micro-array', 'High-throughput sequencing',
    'Multiplex Ligation-dependent Probe Amplification'
);
# skip these lab-tests:
my %excluded_lab_tests = (
    FISH => [(
        'Christie Myeloma Fusion-1',
        'Christie Myeloma Fusion-2',
        'Christie Myeloma Screen',
        'Cytocell 3q29 (control)',
        'Cell selection quality',
        'Cytocell 5q32-q33.1',
        'FISH H & E review',
        'Cytocell 7q36.1',
        'Cytocell CEN11',
        'Cytocell CEN12',
        'Cytocell CEN17',
        'Cytocell CEN18',
        'Cytocell CEN6',
        'Cytocell CEN7',
        'Cytocell CEN8',
        'Cytocell CEN9',
        'Christie CLL',
        'Abbott CEN11',
        'Abbott CEN18',
        'Abbott CEN7',
        'Abbott CEN8',
        'PNH myeloid',
        'CLL trial',
        'EBV ISH',
     )],
    Molecular => [(
        '100KG DNA extraction',
        'Sanger sequencing',
        'Refer material',
        'DNA control',
        'ABL control',
    )],
);
# tests which are confirmatory and should not be included:
push @{ $excluded_lab_tests{FISH} }, 'Zytolight FGFR3';
push @{ $excluded_lab_tests{Molecular} }, ('SRSF2', 'ASXL1');
# tests not currently on NGIS test directory (needs to be adaptable):
push @{ $excluded_lab_tests{FISH} }, ('ATM', 'Cytocell ATM');
push @{ $excluded_lab_tests{Molecular} }, # panels:
    ('Burkitt mutation', 'CNS/testicular DLBCL'); # p %excluded_lab_tests;

{ # may need to exclude D/RNA extractions in future:
    my @extractions = ( 'DNA extraction', 'RNA extraction', 'CD138+ DNA' );
    # push @{ $excluded_lab_tests{Molecular} }, @extractions;
}
# exclude all quantification & store D/RNA tests:
my $excluded_test_name_re = 'quantification|store_[dr]na';

my @excluded_parent_codes = (
	'RWM', 	  # Cardiff
	'RVA', 	  # Carmarthen
	'RT8', 	  # Conwy
	'RVF', 	  # Gwent
	'RT9', 	  # NE Wales
	'RT7', 	  # NW Wales
	'RVC', 	  # Swansea
	'RQF', 	  # Velidre, Wales
	'8EQ15',  # Drs Lab
	'MSDMDH', # Mater Dei
	'X99999', 'V81999' # unknowns
);

# look for loose match with presentation term:
my %presentation_types = (
    AML       => 'M80',
    CML       => 'M84',
    MDS       => 'M82',
    MPN       => 'M85',
    PNH       => 'M82',
    Hodgkin   => 'M95',
    'B-ALL'   => 'M91',
    myeloma   => 'M92', # case-sensitive
    Aplastic  => 'M82',
    Cytopenia => 'M82', # MDS
    # forcing lymphomas to clinical indication code for high-grade lymphoma (M99):
    # TODO: maybe better to force to prehistoric term B-NHL ??
    'Probable B-LPD'     => 'M99',
    'Suspected lymphoma' => 'M99',
    'Suspected B-cell lymphoma' => 'M99',
);

# force specific NGIS test codes (last resort) - TODO: replace with defaults for all tests:
my %forced_ngis_codes = (
    'BCR-ABL-tests' => 'M84.1', # CML (regex match in try_presentation())
    'RUNX1-RUNX1T1' => 'M80.7', # AML
    'CBFbeta MYH11' => 'M80.7', # AML
    'HTS myeloid'   => 'M82.1', # MDS
    'CytoSNP'       => 'M82.2', # MDS
);

# molecular lab-tests done on RNA (the rest done on DNA):
my $rna_tests = do {
    my @skipped_tests = ( 'FLT3 ITD', 'FLT3 TKD', 'NPM1' ); # can be done on both DNA & RNA
    get_rna_tests(\@skipped_tests);
}; # p $rna_tests; exit;

# get all panel tests (omitting excluded tests) for FISH & Moelcular sections:
my $panel_lab_test = get_panel_lab_tests(['FISH','Molecular']); # p $panel_lab_test; exit;

# get lab-tests auto-requested at registration (discontinued):
# my $registration_lab_tests = get_registration_lab_test_requests(); # p $registration_lab_tests;

# get list of section lab test id's (for inter-converting tests & panels):
my $section_lab_test_id = get_section_lab_tests(['FISH', 'Molecular']); # p $section_lab_test_id;

# get mapped HILIS4 lab_test.id => NGIS test code:
my $ngis_lab_tests = get_ngis_lab_tests(); # p $ngis_lab_tests;
#-------------------------------------------------------------------------------

my ($sql, @bind) = _get_main_query(); # p $sql; p @bind;
my $t0 = [gettimeofday];

my $query = $dbix->query( $sql, @bind );

# get cols from query, except 'private' ones used for evaluations (eg _datetime_screened):
my @headers = grep $_ !~ /^_/, $query->columns; # p @headers;
my @data = $query->hashes; runtimer('query runtime');

my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row(\@headers);

runtimer('commencing data processing');

# get request_ids and DNA/RNA extraction dates into hash map:
my $extraction_dates = do {
    my @request_ids = map $_->{internal_request_id}, @data;
    get_extraction_dates_for_requests(\@request_ids);
}; # io('extraction_dates.txt')->appendln($_) for keys %$extraction_dates;

# pre-process data to reconstitute (some) molecular panels:
runtimer('reconstituting molecular panels');
@data = reconstitute_molecular_panels(\@data); # p @data;

# pre-process data to split (some) FISH panels (too complex to combine with above):
runtimer('splitting FISH panels');
@data = split_fish_panels(\@data); # p @data;
runtimer('finished pre-processing data');

# process @data:
for my $ref (@data) {
    my $data = process_data($ref); # returns arrayref
	$xl->write_row($data);
}
runtimer('completed data processing');

if ($JUST_TESTING) {
    $xl->save(join '/', $Bin, $filename);
}
else {
    my %mail = (
        attachment => $xl->data,
        filename   => $filename,
        subject    => $subject,
        config     => $config,
    );
    $tools->send_mail(\%mail, \@data_recipients);
}

# log all misfits:
_do_log();

#===============================================================================

sub runtimer {
    return unless $JUST_TESTING;
    say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday];
}

sub debug {
    return unless $JUST_TESTING;
    say $_[0];
}

sub process_data {
    my $ref = shift; # href

    # NGIS test code:
    $ref->{test_id} = get_ngis_test_code($ref);

    # determine date lab-test requested (replaced by datetime_order_received):
    # $ref->{datetime_test_requested} = get_test_request_date($ref); # will need to be revived

    # set datetime_testing_complete to test completion datetime:
    $ref->{datetime_testing_complete} = $ref->{_datetime_test_completed};

    { # set datetime_reporting_complete to later of testing_complete or authorisation:
        if ( $ref->{_datetime_authorised} ) {
            my @dates = @{$ref}{ qw/_datetime_authorised _datetime_test_completed/ };
            # dates in yyyy-mm-dd format can be handled as strings for determination
            # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
            $ref->{datetime_reporting_complete} = maxstr(@dates); # List::Util
        }
        else { # not authorised, set to _datetime_test_completed:
            $ref->{datetime_reporting_complete} = $ref->{_datetime_test_completed}
        }
    }
=begin # datetime_processing_complete now set to D/RNA extraction date
    # set datetime_processing_complete to later of authorisation date or test
    # completion date; value will be null if request not yet authorised:
        # p $ref->{sample_processed_date}; p $ref->{_datetime_authorised};
    if ( $ref->{_datetime_authorised} ) {
        my @dates = @{$ref}{ qw/_datetime_authorised datetime_testing_complete/ };
        # dates in yyyy-mm-dd format can be handled as strings for determination
        # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
        $ref->{datetime_processing_complete} = maxstr(@dates); # List::Util
    } # else sample_completed_date remains null
=cut
    $ref->{datetime_processing_complete} = get_processing_date($ref);

    # new template wants some cols temporarily NULL (commented out in query now):
    my @null_fields = qw//;
    $ref->{$_} = undef for @null_fields;

    return [ @{$ref}{@headers} ];
}

sub _main_query_cols {
    my @cols = (
        'CONCAT_WS("/",  r.request_number, r.year - 2000)|local_sample_id',
#        'NULL|local_sample_id_sub',
#        'NULL|local_dna_number',
#        'NULL|local_dna_number_sub',
        'p.id|internal_patient_id',
        'r.id|internal_request_id',
        q!"GeNEQ Leeds (HILIS)"|booking_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|processing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|testing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|reporting_laboratory!,
#        'NULL|ped_number',
#        'p.first_name|patient_first_name',
#        'p.last_name|patient_last_name',
#        'p.dob|patient_dob',
        'p.gender|patient_sex',
#        'p.nhs_number',
#        'pd.address|patient_address',
#        'NULL|patient_district_of_residence',
        'NULL|patient_city',
#        'pd.post_code|patient_postcode',
#        'NULL|patient_country',
        'ref.national_code|referrer_ods_code',
#        'NULL|referrer_email',
#        'NULL|referrer_telephone',
#        'NULL|referrer_title',
#        'NULL|referrer_first_name',
#        'ref.name|referrer_last_name',
        'hd.display_name|referrer_specialty',
#        'rs.display_name|referring_facility_name',
#        'NULL|referrer_address',
#        'NULL|referrer_city',
#        'NULL|referrer_district',
#        'NULL|referrer_postcode',
#        'NULL|referrer_county',
        'rs.organisation_code|referring_facility_ods_code',
#        'NULLIF( pc.unit_number, "UNKNOWN")|unit_number', # NULL if a = b
#        'NULL|other_unit_number',
#        'IFNULL(rrv.clinical_details, "DiagnosticTest")|referral_reason', # a or b
        q!"Routine"|test_priority!,
        'NULL|test_id', # NGIS test id (eg M80.6)
        'lt.field_label|test_name',
#        'ls.section_name|lab_section',
        'NULL|ngis_id', # NGIS order number ?
#        'NULL|datetime_test_requested', # replaced by datetime_order_received
#        'lt.test_type|request_comments', # panel or test
        q!GROUP_CONCAT(DISTINCT s.description SEPARATOR '; ')
            AS sample_type_received!,
        'rsd.specimen_date|datetime_sample_collected',  # datetime
        'r.created_at|datetime_sample_received',        # datetime
        'r.created_at|datetime_order_received',         # datetime
        'NULL|datetime_processing_complete',            # set later
        'NULL|datetime_testing_complete',               # set later
        'NULL|datetime_reporting_complete',             # set later
        'rsd.specimen_quality|sample_condition',
#        q!MAX( CASE WHEN ao.option_name = 'doi'
#            THEN 'yes' ELSE 'no' END ) AS infection_risk!,
#        'NULL|sample_type_dispatched',
#        'IF( rst.volume, "ul", NULL )|sample_units',
#        'rst.volume|sample_amount',
#        q!IF( rst.concentration < 9999, rst.concentration, NULL )
#            AS dna_concentration!,
#        'NULL|sample_date_sent',
#        'rst.vialId|sample_tube_id',
#        'sr.plateId|sample_rack_id',
#        'NULL|sample_courier',
#        'NULL|sample_courier_ref',

#        'NULL|sample_processed_date',  # renamed datetime_processing_complete
#        'NULL|sample_completed_date',  # renamed datetime_testing_complete

    # fields used for data processing only (underscored for omission in @headers):
        'lt.id|_lab_test_id',
        'lt.test_type|_test_type',
        'lt.test_name|_test_name',
        'ls.section_name|_section_name',
        'd.name|_diagnosis',
        'nci.code|_clinical_indication_code',
        's2.description|_presentation',
        'rrv.diagnosis_id|_diagnosis_id',
		# q!MAX( CASE WHEN rsv.action = 'screened' THEN rsv.time END )
        #    AS _datetime_screened!, # not used
		# q!MAX( CASE WHEN rsv.action = 'reported' THEN rsv.time END )
        #    AS _datetime_reported!, # not used
		q!MAX( CASE WHEN rsv.action = 'authorised' THEN rsv.time END )
            AS _datetime_authorised!,
        'ts.time|_datetime_test_completed', # date lab-test set to complete
    );
    return wantarray ? @cols : \@cols;
}

sub _get_main_query {
    my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!;
    my $begin_date = $ref_date->clone->set_day(1); # first day of $ref_date month

    my %date_restriction = $cumulative # if --cumulative Getopt:
        ? ( '>=', $begin_date->ymd ) # all since ref_date
        : ( -between => [ # first & last day of ref_date month:
                $begin_date->ymd, # 1st day
                $begin_date->add(months => 1)->subtract(days => 1)->ymd, # last day
            ] );
    my $excluded_lab_test_ids = get_excluded_lab_test_ids();

    my $cols = _main_query_cols(); # these keep changing
    my @rels = (                                'requests|r'                   ,
        q{r.patient_case_id=pc.id}           => 'patient_case|pc'              ,
        q{pc.patient_id=p.id}                => 'patients|p'                   ,
        q{pc.referral_source_id=rs.id}       => 'referral_sources|rs'          ,
        q{rs.parent_organisation_id=po.id}   => 'parent_organisations|po'      ,
        q{r.referrer_department_id=rd.id}    => 'referrer_department|rd'       ,
        q{rd.hospital_department_code=hd.id} => 'hospital_departments|hd'      ,
        q{rd.referrer_id=ref.id}		     => 'referrers|ref'                ,
        q{rsd.request_id=r.id}               => 'request_specimen_detail|rsd'  ,
        q{rsp.request_id=r.id}               => 'request_specimen|rsp'         ,
        q{rsp.specimen_id=s.id}              => 'specimens|s'                  ,
        q{ts.request_id=r.id}                => 'request_lab_test_status|ts'   ,
        q{ts.status_option_id=so.id}         => 'lab_test_status_options|so'   ,
        q{ts.lab_test_id=lt.id}              => 'lab_tests|lt'                 ,
        q{lt.lab_section_id=ls.id}           => 'lab_sections|ls'              ,
        q{rsv.request_id=r.id}               => 'request_status_view|rsv'      ,
        q{ris.request_id=r.id}               => 'request_initial_screen|ris'   ,
        q{ris.screen_id=s2.id}               => 'screens|s2'                   ,
      # left joins:
        q{=>rrv.request_id=r.id}             => 'request_report_view|rrv'      ,
        q{=>rrv.diagnosis_id=d.id}           => 'diagnoses|d'                  ,
        q{=>nd.diagnosis_id=d.id}            => 'ngis_diagnosis|nd'            ,
        q{=>nd.ngis_indication_id=nci.id}    => 'ngis_clinical_indications|nci',
        q{=>ro.request_id=r.id}              => 'request_option|ro'            ,
        q{=>ro.option_id=ao.id}              => 'additional_options|ao'        ,
        q{=>pd.patient_id=p.id}              => 'patient_demographics|pd'      ,
        q{=>rst.request_id=r.id}             => 'request_storage|rst'          ,
        q{=>rst.rack_id=sr.id}               => 'storage_racks|sr'             ,
        q{=>rt.request_id=r.id}              => 'request_trial|rt'             ,
    );
    my %where = (
        -and => [ # repeated elements (eg parent_code), so requires arrayref
            'rt.request_id'        => undef,      # not in clinical trial
            'so.description'       => 'complete', # lab-test status
            'ls.section_name'      => { -in => \@lab_sections },
            'rs.organisation_code' => { -not_rlike => '^NT' },    # private hospital
            'po.parent_code'       => { -not_rlike => '^(S|Z)' }, # scotland/ireland
            'po.parent_code'       => { -not_in => \@excluded_parent_codes },
            'DATE(ts.time)'        => \%date_restriction,
            'lt.test_name'         => { -not_rlike => $excluded_test_name_re },
            'lt.id'                => { -not_in    => $excluded_lab_test_ids },
            # to restrict to specific request id's:
            # 'r.id'               => { -in => [395682] },
        ],
    );
    my @args = (
		-columns  => $cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
        -group_by => [ 'lt.id', 'r.id' ],
        -having   => { $is_private => undef }, # not private patient
        -order_by => 'r.id',
        #-limit => 100,
        #-offset => 100,
    ); # p @args;
    # need to supply our own injection_guard to override built-in sql injection
    # attack prevention which detects ';' in GROUP_CONCAT, but we don't need one
    # here as we have no user input:
    my $sqla = SQL::Abstract::More->new( injection_guard => qr/^$/ );
    my ($sql, @bind) = $sqla->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return ($sql, @bind);
}

sub get_excluded_lab_test_ids {
    my @test_ids;

    my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
    my %args = (
        -columns  => 'lt.id',
        -from     => [ -join => @rels ],
    );
    while ( my($section, $ref) = each %excluded_lab_tests ) {
        $args{'-where'} = {
            'ls.section_name' => $section,
            'lt.field_label'  => { -in => $ref },
        };
        my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind;
            $dbix->dump_query($sql, @bind) if $query_output; # exit;
        my @ids = $dbix->query($sql, @bind)->column; # ref
        push @test_ids, @ids;
    } # p @test_ids;
    return wantarray ? @test_ids : \@test_ids;
}

sub get_extraction_dates_for_requests {
    my $request_ids = shift;

    my @cols = (
        'ts.request_id',
        q/IF(lt.test_name='cd138_dna','CD138DNA',LEFT(lt.field_label,3))|type/,
        'ts.time',
    );
    my @rels = (                       'request_lab_test_status|ts',
        'ts.lab_test_id=lt.id'      => 'lab_tests|lt'              ,
        'ts.status_option_id=so.id' => 'lab_test_status_options|so',
    );
    my @extraction_tests = qw( dna_extraction rna_extraction cd138_dna );
    my %where = (
        'so.description' => 'complete',
        'lt.test_name'   => { -in => \@extraction_tests },
        'ts.request_id'  => { -in => $request_ids },
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    my $query = $dbix->query( $sql, @bind );

    my %h;
    while ( my $ref = $query->hash ) {
        my $request_id = $ref->{request_id};
        my $datetime   = $ref->{time};
        my $type       = $ref->{type};

        $h{$request_id}{$type} = $datetime;
    }
    return \%h;
}

sub get_processing_date {
    my $ref = shift;
    # FISH tests not done on D/RNA extraction so can't get a processing date:
    return if $ref->{_section_name} eq 'FISH';

    my $test_name = $ref->{_test_name}; # p $test_name;
    my $lab_num   = $ref->{local_sample_id};
    my $req_id    = $ref->{internal_request_id}; # p $req_id;

    # get extraction dates for DNA and/or RNA:
    my $extraction = $extraction_dates->{$req_id}; # p $extraction;
    debug("no extraction for $lab_num $test_name") and return
        unless $extraction;

    # get expected material, RNA if test_name in %rna_tests, otherwise assume
    # DNA, except a small minority of tests done on CD138-selected DNA (see below):
    my $material = $rna_tests->{$test_name} ? 'RNA' : 'DNA';
    # get extraction date for test material:
    my $date = $extraction->{$material};
    # some tests (eg MLPA myeloma) are done on CD138-selected DNA -
    # key = CD138DNA in %extraction_dates:
    $date ||= $extraction->{CD138DNA}; # or give up - no extraction date available:
    debug("no extraction date for $lab_num $test_name $material") and return
        unless $date;
    return $date;
}

=begin # not used - replaced by datetime_order_received
sub get_test_request_date {
    my $ref = shift; # p $ref;

    my $request_id = $ref->{internal_request_id};
    my $registered = $ref->{datetime_sample_received};
    my $test_name  = $ref->{test_name};
    my $screened   = $ref->{_datetime_screened};

    # use manual request date if exists, or auto-request date, or date of screening:
    my $manual_request_date = get_manual_test_request($request_id, $test_name);
    # auto-requested by sample-type, therefore date = registration:
    my $auto_request_date = $registration_lab_tests->{$test_name}
        ? $registered : 0; # zero OK as will be tested for truth below
        # p [$manual_request_date, $auto_request_date, $screened];

    return $manual_request_date || $auto_request_date || $screened;
}
=cut

=begin # not used:
sub get_registration_lab_test_requests {
    my @cols = ( 'DISTINCT t2.field_label', 1 );
    my @rels = ('specimen_lab_test|t1', 't1.lab_test_id=t2.id', 'lab_tests|t2');
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query($sql, @bind)->map;
}
=cut

sub get_rna_tests {
    my $skipped_tests = shift;
    my @rels = (                       'linked_lab_test|llt' ,
        'llt.parent_test_id=lt1.id' => 'lab_tests|lt1'       ,
        'llt.linked_test_id=lt2.id' => 'lab_tests|lt2'       ,
    );
    my @args = (
		-columns  => [ 'lt1.test_name', 1 ],
		-from     => [ -join => @rels ],
		-where    => {
            'lt1.field_label' => { -not_in => $skipped_tests },
            'lt2.test_name'   => 'rna_extraction',
        },
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    my $map = $dbix->query( $sql, @bind )->map;
    # add rna_extraction in case extraction tests are required in data return:
    $map->{rna_extraction} = 1;
    return $map;
}

=begin # called from get_test_request_date(), not used
sub get_manual_test_request {
    my ($request_id, $test_name) = @_;

    my @args = (
		-columns  => [ 'time' ],
		-from     => [ 'request_lab_test_history' ],
        -where    => {
            request_id => $request_id,
            action => {
                -rlike => '^(auto-)?requested( linked test)? ' . $test_name
            },
        },
        -order_by => [ '-time' ], # ORDER BY time DESC
        -limit    => 1,           # returns most recent only if >1
    ); # p @args;

    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query($sql, @bind)->value;
}
=cut

sub get_ngis_test_code {
    my $ref = shift; # p $ref if $ref->{test_name} =~ /Burkitt mutation/;

    my $presentation = $ref->{_presentation};
    my $diagnosis_id = $ref->{_diagnosis_id};
    my $lab_test_id  = $ref->{_lab_test_id};
    my $lab_section  = $ref->{_section_name};
    my $test_name    = $ref->{test_name};       # say $test_name;
    my $test_type    = $ref->{_test_type};
    my $diagnosis    = $ref->{_diagnosis} || '';
    my $lab_num      = $ref->{local_sample_id};

    my $presentation_diagnosis = join '::', $presentation,
        $diagnosis || '[NOT REPORTED]'; # p $presentation_diagnosis;

  # get NGIS code(s) for this lab-test:
    my $ngis_test_code = $ngis_lab_tests->{$lab_test_id}; # p $ngis_test_code; # aref
    unless ($ngis_test_code) {
        my $title =join ' :: ', $test_name, $test_type, $lab_section;
        $STASH{no_ngis_code}{$title}++;

        # debug("$lab_num: no NGIS id for $test_name");
        return $test_name !~ /[DR]NA extraction|CD138\+ DNA/ # return undef if extraction
            ? $presentation_diagnosis
            : undef;
    }
  # return NGIS test code if it's the only one (even if not supported clinical
  # indication; TODO: this might not be suitable, might need to consider diagnosis):
    return $ngis_test_code->[0] if scalar @$ngis_test_code == 1;

    my $all_test_codes = join ', ', @$ngis_test_code; # for log

  # now have multiple NGIS id's for lab-test
    # common message for debugging:
    my $msg = qq!$lab_num: multiple NGIS codes for '$test_name'!;

    # vars for try_presentation():
    my %vars_for_presentation = ( data => $ref, ngis_code => $ngis_test_code );

   # now need to examine diagnosis:
    if (! $diagnosis_id) {
        if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
            # debug("found $ngis_code from $presentation");
            return $ngis_code;
        }
        # io($log)->appendln("$msg and request is not reported");
        push @{ $STASH{unreported} }, [ $lab_num, $test_name, $presentation ];
        return $presentation_diagnosis;
    }
  # get clinical indication for diagnosis (eg AML NOS = M80)
    my $indication = $ref->{_clinical_indication_code}; # p $indication;
    if (! $indication) { # debug('no clinical indication');
        if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
            # debug("found $ngis_code from $presentation");
            return $ngis_code;
        }
        #io($log)->appendln("$msg and '$diagnosis' has no clinical indication id"
        #    . " [presentation: $presentation]");
        push @{ $STASH{no_clinical_indication} },
            [ $lab_num, $test_name, $presentation, $diagnosis ];
        return $presentation_diagnosis;
    } # p $indication; p $ngis_ids;

  # return NGIS code from list if has a matching clinical indication:
    for my $ngis_code (@$ngis_test_code) { # warn $ngis_code;
        # debug("$lab_num: $indication -> $ngis_code") if $ngis_code =~ /^$indication/;
        return $ngis_code if $ngis_code =~ /^$indication/;
    }
  # last resort is to try to find NGIS code from presentation:
    if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
        # debug("found $ngis_code from $presentation");
        return $ngis_code;
    }

  # can't find an NGIS code for lab-test from presentation or diagnosis:
    #io($log)->appendln("$msg and diagnosis indication $indication [$diagnosis] "
    #    . "did not match any available NGIS test codes [presentation: $presentation]");
    # debug("no NGIS id available for $test_name");
    push @{ $STASH{no_available_ngis_code} }, [ $lab_num, $test_name,
        $all_test_codes, $diagnosis, $indication, $presentation ];
    # return undef;
    return $presentation_diagnosis;
}

sub try_presentation { # try to look for a loose match with presentation term:
    my $args = shift;

    my $presentation = $args->{data}->{_presentation};
    my $request_id   = $args->{data}->{internal_request_id};
    my $ngis_codes   = $args->{ngis_code};
    my $test_name    = $args->{data}->{test_name};
       # p $presentation; p $ngis_codes; p $test_name;

    # get any previous screening terms from history file:
    my @all_screens = get_request_history($request_id);
    push @all_screens, $presentation; # p @all_screens;

    for my $screen (@all_screens) {
        for my $type ( keys %presentation_types ) { # p $type; # eg AML, MDS
            if ( $screen =~ /$type/ ) { # eg Suspected MDS =~ /MDS/
                # debug("$screen matches $type");
                for my $code (@$ngis_codes) { # eg M84.2
                        # p $code; p $presentation_types{$type};
                    if ( $code =~ /^$presentation_types{$type}/ ) { # eg M84.2 =~ /^M84/
                        # debug("$code matches $presentation_types{$type}");
                        return $code;
                    }
                }
            }
        }
    }
    { # if we get this far, no other match possible, force NGIS code if test
        # configured in %forced_ngis_codes:
        for my $test ( keys %forced_ngis_codes ) {
            return $forced_ngis_codes{$test} if $test_name eq $test;
        }
        # BCR-ABL p210 RQ or BCR-ABL multiplex:
        return $forced_ngis_codes{'BCR-ABL-tests'} if $test_name =~ /BCR-ABL/;
    }
    return undef;
}

sub get_request_history {
    my $request_id = shift; # p $request_id;

    my $query = $dbix->select('request_history', 'action',
        {
            request_id => $request_id,
            action     => { -like => 'updated initial screen entry %' }
        }
    );
    my @screens;
    while ( my $action = $query->value ) { # p $action;
        if ( $action =~ /updated initial screen entry (.*)/ ) {
            push @screens, $1;
        }
    }
    return wantarray ? @screens : \@screens;
}

sub get_ngis_lab_tests {
    my $q = $dbix->select('ngis_lab_test', [qw/lab_test_id ngis_test_code/]);
    my %h;
    while ( my $ref = $q->array ) { # p $ref;
        my $lab_test_id = $ref->[0];
        push @{ $h{$lab_test_id} }, $ref->[1];
    }
    return \%h;
}

sub split_fish_panels { # only some FISH panels (those without an NGIS map):
    my $data = shift; # p $data;

    my $required_fish_panels = get_fish_panel_names_without_ngis_codes(); # p $required_fish_panels;
    my @fish_panel_lab_tests = @{ $panel_lab_test->{FISH} };

    my %non_ngis_fish_panels;
    # skip panels from $fish_panel_lab_test with NGIS codes:
    for my $ref (@fish_panel_lab_tests) { # AoA
        my $panel_name = $ref->[0]; # p $panel_name;
        next unless grep { $panel_name eq $_ } @$required_fish_panels;
        # debug("$panel_name in required_fish_panels");
        $non_ngis_fish_panels{$panel_name} = $ref->[1];
    } # p %non_ngis_fish_panels;

    # coderef for substituting test names (eg  "DLBCL + FL" panel -> "DAKO MYC")
    my $substitute_values = sub {
        my ($row, $new_test_name) = @_;
        $row->{test_name} = $new_test_name;
        # also need to change lab_test_id for finding NGIS test code:
        $row->{_lab_test_id} = $section_lab_test_id->{FISH}->{$new_test_name};
        $row->{_test_type}   = 'test'; # for log in case it's unmapped
    };

    ROW:
    for my $row (@$data) {
        my $test_name = $row->{test_name};
        my $lab_num   = $row->{local_sample_id};
        # if test name matches one of FISH panels without an NGIS code:
        if ( grep $test_name eq $_, keys %non_ngis_fish_panels ) {
            # get FISH panel test names:
            my @panel_tests = @{ $non_ngis_fish_panels{$test_name} };
            { # change test_name to 1st element of $panel_tests
                my $new_test_name = shift @panel_tests;
                debug("$lab_num: changing $test_name to $new_test_name");
                &$substitute_values($row,$new_test_name);
            }
            # now clone $row for each remaining element in @panel_tests:
            TEST:
            for my $t (@panel_tests) {
                my $new = clone $row;
                # repeat above substitutions:
                debug("$lab_num: adding new row with test-name $t");
                &$substitute_values($new,$t);
                push @$data, $new;
            }
        }
    }
    # re-sort by lab_number (@data now out of sequence after pushing new rows)
    my @new = sort { $a->{internal_request_id} <=> $b->{internal_request_id} }
        @data;
    return wantarray ? @new : \@new;
}

sub get_fish_panel_names_without_ngis_codes {
    my @rels = (                       'lab_tests|lt',
        'lt.lab_section_id=ls.id'   => 'lab_sections|ls',
        '=>nlt.lab_test_id=lt.id'   => 'ngis_lab_test|nlt'
    );
    my %where = (
        'ls.section_name' => 'FISH',
	    'lt.test_type'    => 'panel',
        'lt.field_label'  => { -not_in => $excluded_lab_tests{FISH} },
        'nlt.lab_test_id' => undef,
    );
    my @args = (
		-columns  => 'lt.field_label',
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query( $sql, @bind )->column;
}

sub reconstitute_molecular_panels {
    my $data = shift; # p $data;

    # new data file:
    my @new;

    # keep track of request_id:
    my $last_request_id = 0;
    # holder for molecular tests data:
    my @molecular_tests = (); # will be destroyed and recreated after each request

    ROW:
    for my $row (@$data) {
        my $lab_section = $row->{_section_name};
        my $request_id  = $row->{internal_request_id};
        # if new request, handle molecular tests data, if any:
        if ( @molecular_tests && $request_id != $last_request_id ) {
            $log->info("molecular tests for $last_request_id:",
                { set => [ map $_->{test_name}, @molecular_tests ] } );
            my @rows = molecular_tests_to_panels(\@molecular_tests); # p @rows;
            push @new, @rows;
            @molecular_tests = (); # reset for next request
        }
        push @new, $row and next ROW if $lab_section ne 'Molecular';
        # have molecular data row:
        push @molecular_tests, $row;
        # set last_request_id to current request_id:
        $last_request_id = $request_id;
    } # p %tests;
    return wantarray ? @new : \@new;
}

sub molecular_tests_to_panels {
    my $ref = shift; # AoH's

    my @molecular_panel_lab_tests = @{ $panel_lab_test->{Molecular} };
    my @all_molecular_tests       = map $_->{test_name}, @$ref;
        # say 'test_names:'; p @all_molecular_tests;

    # panels ordered in size of array, largest 1st to prevent matches with
    # smaller panels having common members (eg AML 60+ & AML under-60):
    PANEL:
    for my $t (@molecular_panel_lab_tests) { # eg AML 60+ => [NPM1, FLT3 ITD]
        my ($panel_name, $panel_tests) = @$t; # p @$panel_tests;
        # don't need ordered list, use -u flag for speed boost:
        my $lc = List::Compare->new('-u', $panel_tests, \@all_molecular_tests);
        # if this panel list is a subset of all_molecular_tests:
        if ( $lc->is_LsubsetR ) { # is left-a-subset-of-right ?
            $log->info("$panel_name panel found in molecular set:",
                { panel_tests => $panel_tests } );

            my @set = @$panel_tests; # localise for element substitutions
            my %seen = (); # reset test name stash for this panel

            # remove 1st element in @set into a variable:
            my $first_element_of_set = shift @set;
                # debug("first_element_of_set: $first_element_of_set");

            # for test name of 1st entry in $ref matching 1st element of @set,
            # change it to the matched panel_name & flag the rest for deletion:
            DATA:
            for my $d (@$ref) { # p $d; # data href
                next DATA if $d->{DELETE_ME}; # already flagged for deletion
                my $test_name = $d->{test_name}; # p $test_name;
                my $lab_num   = $d->{local_sample_id};
                    # debug("$lab_num: testing whether $test_name eq $first_element_of_set");
                if ( $test_name eq $first_element_of_set ) {
                    $log->info("changing $test_name to $panel_name");
                     debug("$lab_num: changing $test_name to $panel_name");
                    $d->{test_name} = $panel_name;
                    $d->{_test_type} = 'panel'; # for log in case it's unmapped
                    $d->{_lab_test_id} #  for finding NGIS test code:
                        = $section_lab_test_id->{Molecular}->{$panel_name};
                 }
                elsif ( grep $test_name eq $_, @set ) {
                    # set a flag for later (only once per test in case it's
                    # re-requested or is part of another panel):
                    next DATA if $seen{$test_name}++;
                    $log->info("$test_name is in $panel_name set, flagging for deletion");
                    # debug("$test_name is in $panel_name set, flagging for deletion")
                    $d->{DELETE_ME}++;
                }
            }
        }
    } # p $ref;
    $log->info('=' x 30);
    # return original hashrefs not flagged for deletion:
    my @data = map $_, grep {! $_->{DELETE_ME} } @$ref; # say 'new rows'; p @data;
    return wantarray ? @data : \@data;
}

sub get_section_lab_tests {
    my $lab_sections = shift; # aref

    my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
    my %args = (
		-columns  => [ qw/lt.field_label lt.id/ ],
		-from     => [ -join => @rels ],
		-where    => { 'ls.section_name' => undef }, # defined in block
    ); # p @args;
    my %h;
    for my $section (@$lab_sections) {
        $args{'-where'}{'ls.section_name'} = $section;
        my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind;
             $dbix->dump_query($sql, @bind) if $query_output; # exit;
        $h{$section} = $dbix->query( $sql, @bind )->map;
    } # p \%h;
    return \%h;
}

sub get_panel_lab_tests {
    my $lab_section = shift; # aref

    my @cols = (
    	'lt1.field_label|panel',
        'lt2.field_label|test',
    );
    my @rels = (                         'panel_lab_test|plt'  ,
        'plt.panel_test_id=lt1.id'    => 'lab_tests|lt1'       ,
        'plt.lab_test_id=lt2.id'      => 'lab_tests|lt2'       ,
        'lt1.lab_section_id=ls.id'    => 'lab_sections|ls'     ,
    );
    my %panel_tests;
    for my $section (@$lab_section) {
        # need to *locally* exclude DNA extraction, if not already excluded:
        my @local_excluded_tests = @{ $excluded_lab_tests{$section} };
        push @local_excluded_tests, 'DNA extraction'
            unless grep { $_ eq 'DNA extraction' } @local_excluded_tests;

        my %where = (
            'lt1.field_label' => { -not_in => \@local_excluded_tests },
            'lt2.field_label' => { -not_in => \@local_excluded_tests },
            'ls.section_name' => $section,
            'lt1.is_active'   => 'yes',
        );
        my @args = (
            -columns  => \@cols,
            -from     => [ -join => @rels ],
            -where    => \%where,
        ); # p @args;
        my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
             $dbix->dump_query($sql, @bind) if $query_output; # exit;
        my $query = $dbix->query( $sql, @bind );

        my %h;
        while ( my $ref = $query->hash ) {
            my $panel = $ref->{panel};
            push @{ $h{$panel } }, $ref->{test};
        } # p %h;
        # re-arrange in order of size of panel, largest 1st:
        my @data = map [ +($_ => $h{$_}) ],
            sort { scalar @{ $h{$b} } <=> scalar @{ $h{$a} } } keys %h; # p @data;
        $panel_tests{$section} = \@data;
    }
    return \%panel_tests;
}

sub test_name_order { # p $a; p $b;
    my ($req_num_a, $yr_a) = split '/', $a->[0];
    my ($req_num_b, $yr_b) = split '/', $b->[0];

    my $test_name_a = $a->[1];
    my $test_name_b = $b->[1];

    return $test_name_a cmp $test_name_b
        || $yr_a <=> $yr_b || $req_num_a <=> $req_num_b;
}

sub _do_log { # p %STASH;
    my $no_ngis_code = $STASH{no_ngis_code};
    my $unreported   = $STASH{unreported};
    my $no_clinical_indication = $STASH{no_clinical_indication};
    my $no_available_ngis_code = $STASH{no_available_ngis_code};

    my $filename = sprintf '%s/geneq_data_extract_log_%s.xlsx',
        $Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y');
    my $xl = Local::WriteExcel->new( filename => $filename ); # p $xl;

    if ($no_clinical_indication) {
        $xl->worksheet_name('no clinical indication');
        my @cols = qw( lab_num test_name presentation diagnosis );
        $xl->write_bold_row(\@cols);
        $xl->write_row($_) for @$no_clinical_indication;
    }
    if ($no_available_ngis_code) {
        $xl->worksheet_name('no available ngis test-code');
        my @cols = qw( lab_num test_name ngis_codes diagnosis
            clinical_indication presentation );
        $xl->write_bold_row(\@cols);
        $xl->write_row($_) for @$no_available_ngis_code;
    }
    if ($unreported) {
        $xl->worksheet_name('unreported');
        $xl->write_bold_row([ qw/lab_num test_name presentation/] );
        $xl->write_row($_) for @$unreported;
    }
    if ($no_ngis_code) { # p $no_ngis_code;
        $xl->worksheet_name('no ngis test-code');
        $xl->write_bold_row([ qw/test_name test_type lab_section count/ ] );
        $xl->write_row([ (split ' :: ', $_ ), $no_ngis_code->{$_} ])
            for sort keys %{ $no_ngis_code };
    }

    if ($JUST_TESTING) {
        $xl->save();
    }
    else {
        my $subject = 'GeNEQ data extraction log '
            . $ref_date->strftime('%b %Y'); # p $subject;
        # capture filename from its full path:
        $filename =~ /(geneq_data_extract_log_.*)/; # p $1;

        my %mail = (
            attachment => $xl->data,
            filename   => $1, # captured from above
            subject    => $subject,
            config     => $config,
        );
        $tools->send_mail(\%mail, \@log_recipients);
    }
    { # text file:
        # log summary data to $logfile, override any previous adapter settings
        Log::Any::Adapter->set(File => $logfile);

        $log->info('DIAGNOSIS & PRESENTATION WITH NO CLINICAL INDICATION');
        $log->info(join ' :: ', @$_)
            for sort test_name_order @$no_clinical_indication;

        $log->info('DIAGNOSIS WITH NO AVAILABLE NGIS CODE & PRESENTATION '
                . 'WITH NO CLINICAL INDICATION');
        $log->info(join ' :: ', @$_)
            for sort test_name_order @$no_available_ngis_code;

        $log->info('UNREPORTED REQUESTS');
        $log->info(join ' :: ', @$_) for @$unreported;

        $log->info('LAB TESTS WITH NO NGIS ID');
        $log->info(join ':: ', $_, $no_ngis_code->{$_})
            for sort keys %{ $no_ngis_code };
    }
}