RSS Git Download  Clone
Raw Blame History
#!/usr/bin/env perl

=begin -------------------------------------------------------------------------
NGIS data extraction; check ngis_* tables up-to-date

runs at 6am on 1st & 3rd Friday of month via cron:
0 6 1-7 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ...
0 6 15-21 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ...

manual run: $0 options:
    -m = months
    -t = testing: saves files locally, doesn't email
    -q = query output
    --cumulative = all requests since 1st of month of ref date
    --console = verbose log to console
    --verbose = verbose log to file (overrides console)

maps hilis4 lab tests to an NGIS test code using hilis4.ngis_lab_test table:
if lab_test_id entry(s) exist in ngis_lab_test table:
* if single row then use ngis_test_code value (diagnosis doesn't matter so request
    doesn't need to be reported)
* if lab_test has multiple ngis_test_codes, the diagnosis clinical indication code
    (ngis_diagnosis.ngis_indication_id) is used to determine which (if any)
    ngis_test_code to use, where the diagnosis clinical indication must match
    one of the available ngis test id's eg HTS myeloid test has matching codes
    for a diagnosis of MDS, MPN, etc but not for CML
* if NGIS test code cannot be found on diagnosis, a loose matching on presentation
    is attempted, including previous screen terms used, eg 'MDS' matches
    'Suspected MDS', 'Follow-up MDS'
* last resort is to force NGIS test code onto some test names (eg HTS myeloid)

* recipient does not want technology (FISH, PCR, HTS, etc) so ETV6/RUNX1 (FISH)
    & ETV6-RUNX1 (PCR) potentially ambiguous

* molecular tests are reassembled into panels (if panel has NGIS code)
* FISH panels without NGIS codes are expanded to individual tests

TODO: for molecular panels when 1 or more tests set complete in different months,
    panel not recognised and separate test codes returned eg 20536/19

complications (see geneq_data_extract.log):
* hilis4 lab-tests/panels without ngis test id's - require default codes
* lab tests/panels without a clinical indication mapped to diagnosis or screen
* diagnostic terms without clinical indication (eg 'see comments', 'lab tests only', etc)
* requests not reported when data collected
=cut
#-------------------------------------------------------------------------------

#===============================================================================
my @data_recipients = qw( paul.mcintosh john.fraser raj );
my @log_recipients  = qw( turner talley raj );
#===============================================================================

my $query_output = 0; # --query|q - output sql queries to console
my $log_console  = 0; # --console - verbose log to console
my $verbose_log  = 0; # --verbose - verbose log to file (overrides --console)
my $cumulative   = 0; # --cumulative - all requests since 1st of month of ref_date
my $duration     = 1; # --month|m - default unless specified in command-line opts
my $testing      = 0; # --testing|t - saves file locally, doesn't email

use Getopt::Long;
GetOptions (
    "months|m=i" => \$duration,       # int
    "testing|t"  => \$testing,        # flag
    "query|q"    => \$query_output,   # flag
    "console"    => \$log_console,    # flag
    "verbose"    => \$verbose_log,    # flag
    "cumulative" => \$cumulative,     # flag
); # warn $duration; warn $cumulative; exit;

use strict;
use warnings;
use feature 'say';

my $JUST_TESTING = $testing || 0;

use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;

use IO::All;
use Data::Dumper;
use Clone 'clone';
use Data::Printer;
use List::Compare;
use Log::Any qw($log);
use Log::Any::Adapter;
use SQL::Abstract::More;
use List::Util qw(maxstr);
use Spreadsheet::WriteExcel::Simple;
use Time::HiRes qw(gettimeofday tv_interval);

use lib $Bin . '/../../../lib';
use LIMS::Local::ScriptHelpers;

use lib '/home/raj/perl-lib';
use Local::WriteExcel;
use Local::DB;

my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

my $config = $tools->config();
my $today  = $tools->time_now;
my $dbix   = Local::DB->dbix({ dbname => 'hilis4' });
$dbix->lc_columns = 0; # preserve mixed case on col names (now standardised)

$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col)

my %STASH; # for logging/debugging

#-------------------------------------------------------------------------------
my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date;
my $filename = sprintf 'GeNEQ_HMDS_data_%s.xls', $ref_date->strftime('%b_%Y'); # warn $filename;
my $subject  = sprintf 'GeNEQ HMDS data %s', $ref_date->strftime('%b %Y');

my $logfile  = sprintf '%s/geneq_data_extract_%s.log',
    $Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y'); # warn $logfile;


# for verbose logging to console (can be set or reset later):
Log::Any::Adapter->set('Stdout') if $log_console;
# for verbose logging to logfile (can be set or reset later):
Log::Any::Adapter->set(File => $logfile) if $verbose_log;

# check cytogenetics
my @lab_sections = (
    'FISH', 'Molecular', 'Micro-array', 'High-throughput sequencing',
    'Multiplex Ligation-dependent Probe Amplification'
);
# skip these lab-tests:
my $excluded_lab_tests = $tools->get_yaml_file('ngis_excluded_lab_tests');
# p $excluded_lab_tests; exit;

{ # may need to exclude D/RNA extractions in future:
    my @extractions = ( 'DNA extraction', 'RNA extraction', 'CD138+ DNA' );
    # push @{ $excluded_lab_tests->{Molecular} }, @extractions;
}
# exclude all quantification & store D/RNA tests:
my $excluded_test_name_re = 'quantification|store_[dr]na';

my @excluded_parent_codes = (
	'RWM', 	  # Cardiff
	'RVA', 	  # Carmarthen
	'RT8', 	  # Conwy
	'RVF', 	  # Gwent
	'RT9', 	  # NE Wales
	'RT7', 	  # NW Wales
	'RVC', 	  # Swansea
	'RQF', 	  # Velidre, Wales
	'8EQ15',  # Drs Lab
	'MSDMDH', # Mater Dei
	'X99999', 'V81999' # unknowns
);

# look for loose match with presentation term:
my %presentation_types = (
    AML       => 'M80',
    CLL       => 'M94',
    CML       => 'M84',
    MDS       => 'M82',
    MPN       => 'M85',
    PNH       => 'M82',
    Hodgkin   => 'M95',
    'B-ALL'   => 'M91',
    myeloma   => 'M92', # case-sensitive
    Aplastic  => 'M82',
    Cytopenia => 'M82', # MDS
    # forcing lymphomas to clinical indication code for high-grade lymphoma (M99):
    # TODO: maybe better to force to prehistoric term B-NHL ??
    'Probable B-LPD'     => 'M99',
    'Suspected lymphoma' => 'M99',
    'Suspected B-cell lymphoma' => 'M99',
);

# force specific NGIS test codes (last resort):
my %forced_ngis_codes = (
    'Cytocell CCND1/IGH' => 'M102.2', # MCL
# for confirmation:
#    'Cytocell MECOM'     => 'XTR26.a', # rare-disease temp code (breaks table joins if used in db)
    'BCR-ABL-tests' => 'M84.1', # CML - regex match in try_presentation()
    'RUNX1-RUNX1T1' => 'M80.7', # AML
    'CBFbeta MYH11' => 'M80.7', # AML
    'HTS myeloid'   => 'M82.1', # MDS
    'CytoSNP'       => 'M82.2', # MDS
);

# molecular lab-tests done on RNA (the rest done on DNA):
my $rna_tests = do {
    my @skipped_tests = ( 'FLT3 ITD', 'FLT3 TKD', 'NPM1' ); # can be done on both DNA & RNA
    get_rna_tests(\@skipped_tests);
}; # p $rna_tests; exit;

# get all panel tests (omitting excluded tests) for FISH & Moelcular sections:
my $panel_lab_test = get_panel_lab_tests(['FISH','Molecular']); # p $panel_lab_test; exit;

# get lab-tests auto-requested at registration (discontinued):
# my $registration_lab_tests = get_registration_lab_test_requests(); # p $registration_lab_tests;

# get list of section lab test id's (for inter-converting tests & panels):
my $section_lab_test_id = get_section_lab_tests(['FISH', 'Molecular']); # p $section_lab_test_id;

# get mapped HILIS4 lab_test.id => NGIS test code:
my $ngis_lab_tests = get_ngis_lab_tests(); # p $ngis_lab_tests;
#-------------------------------------------------------------------------------

my ($sql, @bind) = _get_main_query(); # p $sql; p @bind;
my $t0 = [gettimeofday];

my $query = $dbix->query( $sql, @bind );

# get cols from query, except 'private' ones used for evaluations (eg _datetime_screened):
my @headers = grep $_ !~ /^_/, $query->columns; # p @headers;
my @data = $query->hashes; runtimer('query runtime');

my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row(\@headers);

runtimer('commencing data processing');

# get request_ids and DNA/RNA extraction dates into hash map:
my $extraction_dates = do {
    my @request_ids = map $_->{internal_request_id}, @data;
    get_extraction_dates_for_requests(\@request_ids);
}; # io('extraction_dates.txt')->appendln($_) for keys %$extraction_dates;

# pre-process data to reconstitute (some) molecular panels:
runtimer('reconstituting molecular panels');
@data = reconstitute_molecular_panels(\@data); # p @data;

# pre-process data to split (some) FISH panels (too complex to combine with above):
runtimer('splitting FISH panels');
@data = split_fish_panels(\@data); # p @data;
runtimer('finished pre-processing data');

# process @data:
for my $ref (@data) {
    my $data = process_data($ref); # returns arrayref
	$xl->write_row($data);
}
runtimer('completed data processing');

if ($JUST_TESTING) {
    $xl->save(join '/', $Bin, $filename);
}
else {
    my %mail = (
        attachment => $xl->data,
        filename   => $filename,
        subject    => $subject,
        config     => $config,
    );
    $tools->send_mail(\%mail, \@data_recipients);
}

# log all misfits:
_do_log();

#===============================================================================

sub runtimer {
    return unless $JUST_TESTING;
    say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday];
}

sub debug {
    return unless $JUST_TESTING;
    say $_[0];
}

sub process_data {
    my $ref = shift; # href

    # NGIS test code:
    $ref->{test_id} = get_ngis_test_code($ref);

    # determine date lab-test requested (replaced by datetime_order_received):
    # $ref->{datetime_test_requested} = get_test_request_date($ref); # will need to be revived

    # set datetime_testing_complete to test completion datetime:
    $ref->{datetime_testing_complete} = $ref->{_datetime_test_completed};

    { # set datetime_reporting_complete to later of testing_complete or authorisation:
        if ( $ref->{_datetime_authorised} ) {
            my @dates = @{$ref}{ qw/_datetime_authorised _datetime_test_completed/ };
            # dates in yyyy-mm-dd format can be handled as strings for determination
            # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
            $ref->{datetime_reporting_complete} = maxstr(@dates); # List::Util
        }
        else { # not authorised, set to _datetime_test_completed:
            $ref->{datetime_reporting_complete} = $ref->{_datetime_test_completed}
        }
    }
=begin # datetime_processing_complete now set to D/RNA extraction date
    # set datetime_processing_complete to later of authorisation date or test
    # completion date; value will be null if request not yet authorised:
        # p $ref->{sample_processed_date}; p $ref->{_datetime_authorised};
    if ( $ref->{_datetime_authorised} ) {
        my @dates = @{$ref}{ qw/_datetime_authorised datetime_testing_complete/ };
        # dates in yyyy-mm-dd format can be handled as strings for determination
        # of chronology using List::Util::maxstr or could do $dateA gt $dateB:
        $ref->{datetime_processing_complete} = maxstr(@dates); # List::Util
    } # else sample_completed_date remains null
=cut
    $ref->{datetime_processing_complete} = get_processing_date($ref);

    # new template wants some cols temporarily NULL (commented out in query now):
    my @null_fields = qw//;
    $ref->{$_} = undef for @null_fields;

    return [ @{$ref}{@headers} ];
}

sub _main_query_cols {
    my @cols = (
        'CONCAT_WS("/",  r.request_number, r.year - 2000)|local_sample_id',
#        'NULL|local_sample_id_sub',
#        'NULL|local_dna_number',
#        'NULL|local_dna_number_sub',
        'p.id|internal_patient_id',
        'r.id|internal_request_id',
        q!"GeNEQ Leeds (HILIS)"|booking_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|processing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|testing_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|reporting_laboratory!,
#        'NULL|ped_number',
#        'p.first_name|patient_first_name',
#        'p.last_name|patient_last_name',
#        'p.dob|patient_dob',
        'p.gender|patient_sex',
#        'p.nhs_number',
#        'pd.address|patient_address',
#        'NULL|patient_district_of_residence',
        'NULL|patient_city',
#        'pd.post_code|patient_postcode',
#        'NULL|patient_country',
        'ref.national_code|referrer_ods_code',
#        'NULL|referrer_email',
#        'NULL|referrer_telephone',
#        'NULL|referrer_title',
#        'NULL|referrer_first_name',
#        'ref.name|referrer_last_name',
        'hd.display_name|referrer_specialty',
#        'rs.display_name|referring_facility_name',
#        'NULL|referrer_address',
#        'NULL|referrer_city',
#        'NULL|referrer_district',
#        'NULL|referrer_postcode',
#        'NULL|referrer_county',
        'rs.organisation_code|referring_facility_ods_code',
#        'NULLIF( pc.unit_number, "UNKNOWN")|unit_number', # NULL if a = b
#        'NULL|other_unit_number',
#        'IFNULL(rrv.clinical_details, "DiagnosticTest")|referral_reason', # a or b
        q!"Routine"|test_priority!,
        'NULL|test_id', # NGIS test id (eg M80.6)
        'lt.field_label|test_name',
#        'ls.section_name|lab_section',
        'NULL|ngis_id', # NGIS order number ?
#        'NULL|datetime_test_requested', # replaced by datetime_order_received
#        'lt.test_type|request_comments', # panel or test
        q!GROUP_CONCAT(DISTINCT s.description SEPARATOR '; ')
            AS sample_type_received!,
        'rsd.specimen_date|datetime_sample_collected',  # datetime
        'r.created_at|datetime_sample_received',        # datetime
        'r.created_at|datetime_order_received',         # datetime
        'NULL|datetime_processing_complete',            # set later
        'NULL|datetime_testing_complete',               # set later
        'NULL|datetime_reporting_complete',             # set later
        'rsd.specimen_quality|sample_condition',
#        q!MAX( CASE WHEN ao.option_name = 'doi'
#            THEN 'yes' ELSE 'no' END ) AS infection_risk!,
#        'NULL|sample_type_dispatched',
#        'IF( rst.volume, "ul", NULL )|sample_units',
#        'rst.volume|sample_amount',
#        q!IF( rst.concentration < 9999, rst.concentration, NULL )
#            AS dna_concentration!,
#        'NULL|sample_date_sent',
#        'rst.vialId|sample_tube_id',
#        'sr.plateId|sample_rack_id',
#        'NULL|sample_courier',
#        'NULL|sample_courier_ref',

#        'NULL|sample_processed_date',  # renamed datetime_processing_complete
#        'NULL|sample_completed_date',  # renamed datetime_testing_complete

    # fields used for data processing only (underscored for omission in @headers):
        'lt.id|_lab_test_id',
        'lt.test_type|_test_type',
        'lt.test_name|_test_name',
        'ls.section_name|_section_name',
        'd.name|_diagnosis',
        'nci.code|_clinical_indication_code',
        's2.description|_presentation',
        'rrv.diagnosis_id|_diagnosis_id',
		# q!MAX( CASE WHEN rsv.action = 'screened' THEN rsv.time END )
        #    AS _datetime_screened!, # not used
		# q!MAX( CASE WHEN rsv.action = 'reported' THEN rsv.time END )
        #    AS _datetime_reported!, # not used
		q!MAX( CASE WHEN rsv.action = 'authorised' THEN rsv.time END )
            AS _datetime_authorised!,
        'ts.time|_datetime_test_completed', # date lab-test set to complete
    );
    return wantarray ? @cols : \@cols;
}

sub _get_main_query {
    my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!;
    my $begin_date = $ref_date->clone->set_day(1); # first day of $ref_date month

    my %date_restriction = $cumulative # if --cumulative Getopt:
        ? ( '>=', $begin_date->ymd ) # all since ref_date
        : ( -between => [ # first & last day of ref_date month:
                $begin_date->ymd, # 1st day
                $begin_date->add(months => 1)->subtract(days => 1)->ymd, # last day
            ] );
    my $excluded_lab_test_ids = get_excluded_lab_test_ids();

    my $cols = _main_query_cols(); # these keep changing
    my @rels = (                                'requests|r'                   ,
        q{r.patient_case_id=pc.id}           => 'patient_case|pc'              ,
        q{pc.patient_id=p.id}                => 'patients|p'                   ,
        q{pc.referral_source_id=rs.id}       => 'referral_sources|rs'          ,
        q{rs.parent_organisation_id=po.id}   => 'parent_organisations|po'      ,
        q{r.referrer_department_id=rd.id}    => 'referrer_department|rd'       ,
        q{rd.hospital_department_code=hd.id} => 'hospital_departments|hd'      ,
        q{rd.referrer_id=ref.id}		     => 'referrers|ref'                ,
        q{rsd.request_id=r.id}               => 'request_specimen_detail|rsd'  ,
        q{rsp.request_id=r.id}               => 'request_specimen|rsp'         ,
        q{rsp.specimen_id=s.id}              => 'specimens|s'                  ,
        q{ts.request_id=r.id}                => 'request_lab_test_status|ts'   ,
        q{ts.status_option_id=so.id}         => 'lab_test_status_options|so'   ,
        q{ts.lab_test_id=lt.id}              => 'lab_tests|lt'                 ,
        q{lt.lab_section_id=ls.id}           => 'lab_sections|ls'              ,
        q{rsv.request_id=r.id}               => 'request_status_view|rsv'      ,
        q{ris.request_id=r.id}               => 'request_initial_screen|ris'   ,
        q{ris.screen_id=s2.id}               => 'screens|s2'                   ,
      # left joins:
        q{=>rrv.request_id=r.id}             => 'request_report_view|rrv'      ,
        q{=>rrv.diagnosis_id=d.id}           => 'diagnoses|d'                  ,
        q{=>nd.diagnosis_id=d.id}            => 'ngis_diagnosis|nd'            ,
        q{=>nd.ngis_indication_id=nci.id}    => 'ngis_clinical_indications|nci',
        q{=>ro.request_id=r.id}              => 'request_option|ro'            ,
        q{=>ro.option_id=ao.id}              => 'additional_options|ao'        ,
        q{=>pd.patient_id=p.id}              => 'patient_demographics|pd'      ,
        q{=>rst.request_id=r.id}             => 'request_storage|rst'          ,
        q{=>rst.rack_id=sr.id}               => 'storage_racks|sr'             ,
        q{=>rt.request_id=r.id}              => 'request_trial|rt'             ,
    );
    my %where = (
        -and => [ # repeated elements (eg parent_code), so requires arrayref
            'rt.request_id'        => undef,      # not in clinical trial
            'so.description'       => 'complete', # lab-test status
            'ls.section_name'      => { -in => \@lab_sections },
            'rs.organisation_code' => { -not_rlike => '^NT' },    # private hospital
            'po.parent_code'       => { -not_rlike => '^(S|Z)' }, # scotland/ireland
            'po.parent_code'       => { -not_in => \@excluded_parent_codes },
            'DATE(ts.time)'        => \%date_restriction,
            'lt.test_name'         => { -not_rlike => $excluded_test_name_re },
            'lt.id'                => { -not_in    => $excluded_lab_test_ids },
            # to restrict to specific request id's:
            # 'r.id'               => { -in => [395682] },
        ],
    );
    my @args = (
		-columns  => $cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
        -group_by => [ 'lt.id', 'r.id' ],
        -having   => { $is_private => undef }, # not private patient
        -order_by => 'r.id',
        #-limit => 100,
        #-offset => 100,
    ); # p @args;
    # need to supply our own injection_guard to override built-in sql injection
    # attack prevention which detects ';' in GROUP_CONCAT, but we don't need one
    # here as we have no user input:
    my $sqla = SQL::Abstract::More->new( injection_guard => qr/^$/ );
    my ($sql, @bind) = $sqla->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return ($sql, @bind);
}

sub get_excluded_lab_test_ids {
    my @test_ids;

    my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
    my %args = (
        -columns  => 'lt.id',
        -from     => [ -join => @rels ],
    );
    while ( my($section, $ref) = each %{$excluded_lab_tests} ) {
        $args{'-where'} = {
            'ls.section_name' => $section,
            'lt.field_label'  => { -in => $ref },
        };
        my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind;
            $dbix->dump_query($sql, @bind) if $query_output; # exit;
        my @ids = $dbix->query($sql, @bind)->column; # ref
        push @test_ids, @ids;
    } # p @test_ids;
    return wantarray ? @test_ids : \@test_ids;
}

sub get_extraction_dates_for_requests {
    my $request_ids = shift;

    my @cols = (
        'ts.request_id',
        q/IF(lt.test_name='cd138_dna','CD138DNA',LEFT(lt.field_label,3))|type/,
        'ts.time',
    );
    my @rels = (                       'request_lab_test_status|ts',
        'ts.lab_test_id=lt.id'      => 'lab_tests|lt'              ,
        'ts.status_option_id=so.id' => 'lab_test_status_options|so',
    );
    my @extraction_tests = qw( dna_extraction rna_extraction cd138_dna );
    my %where = (
        'so.description' => 'complete',
        'lt.test_name'   => { -in => \@extraction_tests },
        'ts.request_id'  => { -in => $request_ids },
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    my $query = $dbix->query( $sql, @bind );

    my %h;
    while ( my $ref = $query->hash ) {
        my $request_id = $ref->{request_id};
        my $datetime   = $ref->{time};
        my $type       = $ref->{type};

        $h{$request_id}{$type} = $datetime;
    }
    return \%h;
}

sub get_processing_date {
    my $ref = shift;
    # FISH tests not done on D/RNA extraction so can't get a processing date:
    return if $ref->{_section_name} eq 'FISH';

    my $test_name = $ref->{_test_name}; # p $test_name;
    my $lab_num   = $ref->{local_sample_id};
    my $req_id    = $ref->{internal_request_id}; # p $req_id;

    # get extraction dates for DNA and/or RNA:
    my $extraction = $extraction_dates->{$req_id}; # p $extraction;
    debug("no extraction for $lab_num $test_name") and return
        unless $extraction;

    # get expected material, RNA if test_name in %rna_tests, otherwise assume
    # DNA, except a small minority of tests done on CD138-selected DNA (see below):
    my $material = $rna_tests->{$test_name} ? 'RNA' : 'DNA';
    # get extraction date for test material:
    my $date = $extraction->{$material};
    # some tests (eg MLPA myeloma) are done on CD138-selected DNA -
    # key = CD138DNA in %extraction_dates:
    $date ||= $extraction->{CD138DNA}; # or give up - no extraction date available:
    debug("no extraction date for $lab_num $test_name $material") and return
        unless $date;
    return $date;
}

=begin # not used - replaced by datetime_order_received
sub get_test_request_date {
    my $ref = shift; # p $ref;

    my $request_id = $ref->{internal_request_id};
    my $registered = $ref->{datetime_sample_received};
    my $test_name  = $ref->{test_name};
    my $screened   = $ref->{_datetime_screened};

    # use manual request date if exists, or auto-request date, or date of screening:
    my $manual_request_date = get_manual_test_request($request_id, $test_name);
    # auto-requested by sample-type, therefore date = registration:
    my $auto_request_date = $registration_lab_tests->{$test_name}
        ? $registered : 0; # zero OK as will be tested for truth below
        # p [$manual_request_date, $auto_request_date, $screened];

    return $manual_request_date || $auto_request_date || $screened;
}
=cut

=begin # not used:
sub get_registration_lab_test_requests {
    my @cols = ( 'DISTINCT t2.field_label', 1 );
    my @rels = ('specimen_lab_test|t1', 't1.lab_test_id=t2.id', 'lab_tests|t2');
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query($sql, @bind)->map;
}
=cut

sub get_rna_tests {
    my $skipped_tests = shift;
    my @rels = (                       'linked_lab_test|llt' ,
        'llt.parent_test_id=lt1.id' => 'lab_tests|lt1'       ,
        'llt.linked_test_id=lt2.id' => 'lab_tests|lt2'       ,
    );
    my @args = (
		-columns  => [ 'lt1.test_name', 1 ],
		-from     => [ -join => @rels ],
		-where    => {
            'lt1.field_label' => { -not_in => $skipped_tests },
            'lt2.test_name'   => 'rna_extraction',
        },
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    my $map = $dbix->query( $sql, @bind )->map;
    # add rna_extraction in case extraction tests are required in data return:
    $map->{rna_extraction} = 1;
    return $map;
}

=begin # called from get_test_request_date(), not used
sub get_manual_test_request {
    my ($request_id, $test_name) = @_;

    my @args = (
		-columns  => [ 'time' ],
		-from     => [ 'request_lab_test_history' ],
        -where    => {
            request_id => $request_id,
            action => {
                -rlike => '^(auto-)?requested( linked test)? ' . $test_name
            },
        },
        -order_by => [ '-time' ], # ORDER BY time DESC
        -limit    => 1,           # returns most recent only if >1
    ); # p @args;

    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query($sql, @bind)->value;
}
=cut

sub get_ngis_test_code {
    my $ref = shift; # p $ref if $ref->{test_name} =~ /Burkitt mutation/;

    my $presentation = $ref->{_presentation};
    my $diagnosis_id = $ref->{_diagnosis_id};
    my $lab_test_id  = $ref->{_lab_test_id};
    my $lab_section  = $ref->{_section_name};
    my $test_name    = $ref->{test_name};       # say $test_name;
    my $test_type    = $ref->{_test_type};
    my $diagnosis    = $ref->{_diagnosis} || '';
    my $lab_num      = $ref->{local_sample_id};

    my $presentation_diagnosis = join '::', $presentation,
        $diagnosis || '[NOT REPORTED]'; # p $presentation_diagnosis;

  # get NGIS code(s) for this lab-test:
    my $ngis_test_code = $ngis_lab_tests->{$lab_test_id}; # p $ngis_test_code; # aref
    unless ($ngis_test_code) {
        my $title =join ' :: ', $test_name, $test_type, $lab_section;
        $STASH{no_ngis_code}{$title}++;

        # debug("$lab_num: no NGIS id for $test_name");
        return $test_name !~ /[DR]NA extraction|CD138\+ DNA/ # return undef if extraction
            ? $presentation_diagnosis
            : undef;
    }
  # return NGIS test code if it's the only one (even if not supported clinical
  # indication; TODO: this might not be suitable, might need to consider diagnosis):
    return $ngis_test_code->[0] if scalar @$ngis_test_code == 1;

    my $all_test_codes = join ', ', @$ngis_test_code; # for log

  # now have multiple NGIS id's for lab-test
    # common message for debugging:
    my $msg = qq!$lab_num: multiple NGIS codes for '$test_name'!;

    # vars for try_presentation():
    my %vars_for_presentation = ( data => $ref, ngis_code => $ngis_test_code );

   # now need to examine diagnosis:
    if (! $diagnosis_id) {
        if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
            # debug("found $ngis_code from $presentation");
            return $ngis_code;
        }
        # io($log)->appendln("$msg and request is not reported");
        push @{ $STASH{unreported} }, [ $lab_num, $test_name, $presentation ];
        return $presentation_diagnosis;
    }
  # get clinical indication for diagnosis (eg AML NOS = M80)
    my $indication = $ref->{_clinical_indication_code}; # p $indication;
    if (! $indication) { # debug('no clinical indication');
        if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
            # debug("found $ngis_code from $presentation");
            return $ngis_code;
        }
        #io($log)->appendln("$msg and '$diagnosis' has no clinical indication id"
        #    . " [presentation: $presentation]");
        push @{ $STASH{no_clinical_indication} },
            [ $lab_num, $test_name, $presentation, $diagnosis ];
        return $presentation_diagnosis;
    } # p $indication; p $ngis_ids;

  # return NGIS code from list if has a matching clinical indication:
    for my $ngis_code (@$ngis_test_code) { # warn $ngis_code;
        # debug("$lab_num: $indication -> $ngis_code") if $ngis_code =~ /^$indication/;
        return $ngis_code if $ngis_code =~ /^$indication/;
    }
  # last resort is to try to find NGIS code from presentation:
    if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
        # debug("found $ngis_code from $presentation");
        return $ngis_code;
    }

  # can't find an NGIS code for lab-test from presentation or diagnosis:
    #io($log)->appendln("$msg and diagnosis indication $indication [$diagnosis] "
    #    . "did not match any available NGIS test codes [presentation: $presentation]");
    # debug("no NGIS id available for $test_name");
    push @{ $STASH{no_available_ngis_code} }, [ $lab_num, $test_name,
        $all_test_codes, $diagnosis, $indication, $presentation ];
    # return undef;
    return $presentation_diagnosis;
}

sub try_presentation { # try to look for a loose match with presentation term:
    my $args = shift;

    my $presentation = $args->{data}->{_presentation};
    my $request_id   = $args->{data}->{internal_request_id};
    my $ngis_codes   = $args->{ngis_code};
    my $test_name    = $args->{data}->{test_name};
       # p $presentation; p $ngis_codes; p $test_name;

    # get any previous screening terms from history file:
    my @all_screens = get_request_history($request_id);
    push @all_screens, $presentation; # p @all_screens;

    for my $screen (@all_screens) {
        for my $type ( keys %presentation_types ) { # p $type; # eg AML, MDS
            if ( $screen =~ /$type/ ) { # eg Suspected MDS =~ /MDS/
                # debug("$screen matches $type");
                for my $code (@$ngis_codes) { # eg M84.2
                        # p $code; p $presentation_types{$type};
                    if ( $code =~ /^$presentation_types{$type}/ ) { # eg M84.2 =~ /^M84/
                        # debug("$code matches $presentation_types{$type}");
                        return $code;
                    }
                }
            }
        }
    }
    { # if we get this far, no other match possible, force NGIS code if test
        # configured in %forced_ngis_codes:
        for my $test ( keys %forced_ngis_codes ) {
            return $forced_ngis_codes{$test} if $test_name eq $test;
        }
        # BCR-ABL p210 RQ or BCR-ABL multiplex:
        return $forced_ngis_codes{'BCR-ABL-tests'} if $test_name =~ /BCR-ABL/;
    }
    return undef;
}

sub get_request_history {
    my $request_id = shift; # p $request_id;

    my $query = $dbix->select('request_history', 'action',
        {
            request_id => $request_id,
            action     => { -like => 'updated initial screen entry %' }
        }
    );
    my @screens;
    while ( my $action = $query->value ) { # p $action;
        if ( $action =~ /updated initial screen entry (.*)/ ) {
            push @screens, $1;
        }
    }
    return wantarray ? @screens : \@screens;
}

sub get_ngis_lab_tests {
    my $q = $dbix->select('ngis_lab_test', [qw/lab_test_id ngis_test_code/]);
    my %h;
    while ( my $ref = $q->array ) { # p $ref;
        my $lab_test_id = $ref->[0];
        push @{ $h{$lab_test_id} }, $ref->[1];
    }
    return \%h;
}

sub split_fish_panels { # only some FISH panels (those without an NGIS map):
    my $data = shift; # p $data;

    my $required_fish_panels = get_fish_panel_names_without_ngis_codes(); # p $required_fish_panels;
    my @fish_panel_lab_tests = @{ $panel_lab_test->{FISH} };

    my %non_ngis_fish_panels;
    # skip panels from $fish_panel_lab_test with NGIS codes:
    for my $ref (@fish_panel_lab_tests) { # AoA
        my $panel_name = $ref->[0]; # p $panel_name;
        next unless grep { $panel_name eq $_ } @$required_fish_panels;
        # debug("$panel_name in required_fish_panels");
        $non_ngis_fish_panels{$panel_name} = $ref->[1];
    } # p %non_ngis_fish_panels;

    # coderef for substituting test names (eg  "DLBCL + FL" panel -> "DAKO MYC")
    my $substitute_values = sub {
        my ($row, $new_test_name) = @_;
        $row->{test_name} = $new_test_name;
        # also need to change lab_test_id for finding NGIS test code:
        $row->{_lab_test_id} = $section_lab_test_id->{FISH}->{$new_test_name};
        $row->{_test_type}   = 'test'; # for log in case it's unmapped
    };

    ROW:
    for my $row (@$data) {
        my $test_name = $row->{test_name};
        my $lab_num   = $row->{local_sample_id};
        # if test name matches one of FISH panels without an NGIS code:
        if ( grep $test_name eq $_, keys %non_ngis_fish_panels ) {
            # get FISH panel test names:
            my @panel_tests = @{ $non_ngis_fish_panels{$test_name} };
            { # change test_name to 1st element of $panel_tests
                my $new_test_name = shift @panel_tests;
                debug("$lab_num: changing $test_name to $new_test_name");
                &$substitute_values($row,$new_test_name);
            }
            # now clone $row for each remaining element in @panel_tests:
            TEST:
            for my $t (@panel_tests) {
                my $new = clone $row;
                # repeat above substitutions:
                debug("$lab_num: adding new row with test-name $t");
                &$substitute_values($new,$t);
                push @$data, $new;
            }
        }
    }
    # re-sort by lab_number (@data now out of sequence after pushing new rows)
    my @new = sort { $a->{internal_request_id} <=> $b->{internal_request_id} }
        @data;
    return wantarray ? @new : \@new;
}

sub get_fish_panel_names_without_ngis_codes {
    my @rels = (                       'lab_tests|lt',
        'lt.lab_section_id=ls.id'   => 'lab_sections|ls',
        '=>nlt.lab_test_id=lt.id'   => 'ngis_lab_test|nlt'
    );
    my %where = (
        'ls.section_name' => 'FISH',
	    'lt.test_type'    => 'panel',
        'lt.field_label'  => { -not_in => $excluded_lab_tests->{FISH} },
        'nlt.lab_test_id' => undef,
    );
    my @args = (
		-columns  => 'lt.field_label',
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return $dbix->query( $sql, @bind )->column;
}

sub reconstitute_molecular_panels {
    my $data = shift; # p $data;

    # new data file:
    my @new;

    # keep track of request_id:
    my $last_request_id = 0;
    # holder for molecular tests data:
    my @molecular_tests = (); # will be destroyed and recreated after each request

    ROW:
    for my $row (@$data) {
        my $lab_section = $row->{_section_name};
        my $request_id  = $row->{internal_request_id};
        # if new request, handle molecular tests data, if any:
        if ( @molecular_tests && $request_id != $last_request_id ) {
            $log->info("molecular tests for $last_request_id:",
                { set => [ map $_->{test_name}, @molecular_tests ] } );
            my @rows = molecular_tests_to_panels(\@molecular_tests); # p @rows;
            push @new, @rows;
            @molecular_tests = (); # reset for next request
        }
        push @new, $row and next ROW if $lab_section ne 'Molecular';
        # have molecular data row:
        push @molecular_tests, $row;
        # set last_request_id to current request_id:
        $last_request_id = $request_id;
    } # p %tests;
    return wantarray ? @new : \@new;
}

sub molecular_tests_to_panels {
    my $ref = shift; # AoH's

    my @molecular_panel_lab_tests = @{ $panel_lab_test->{Molecular} };
    my @all_molecular_tests       = map $_->{test_name}, @$ref;
        # say 'test_names:'; p @all_molecular_tests;

    # panels ordered in size of array, largest 1st to prevent matches with
    # smaller panels having common members (eg AML 60+ & AML under-60):
    PANEL:
    for my $t (@molecular_panel_lab_tests) { # eg AML 60+ => [NPM1, FLT3 ITD]
        my ($panel_name, $panel_tests) = @$t; # p @$panel_tests;
        # don't need ordered list, use -u flag for speed boost:
        my $lc = List::Compare->new('-u', $panel_tests, \@all_molecular_tests);
        # if this panel list is a subset of all_molecular_tests:
        if ( $lc->is_LsubsetR ) { # is left-a-subset-of-right ?
            $log->info("$panel_name panel found in molecular set:",
                { panel_tests => $panel_tests } );

            my @set = @$panel_tests; # localise for element substitutions
            my %seen = (); # reset test name stash for this panel

            # remove 1st element in @set into a variable:
            my $first_element_of_set = shift @set;
                # debug("first_element_of_set: $first_element_of_set");

            # for test name of 1st entry in $ref matching 1st element of @set,
            # change it to the matched panel_name & flag the rest for deletion:
            DATA:
            for my $d (@$ref) { # p $d; # data href
                next DATA if $d->{DELETE_ME}; # already flagged for deletion
                my $test_name = $d->{test_name}; # p $test_name;
                my $lab_num   = $d->{local_sample_id};
                    # debug("$lab_num: testing whether $test_name eq $first_element_of_set");
                if ( $test_name eq $first_element_of_set ) {
                    $log->info("changing $test_name to $panel_name");
                     debug("$lab_num: changing $test_name to $panel_name");
                    $d->{test_name} = $panel_name;
                    $d->{_test_type} = 'panel'; # for log in case it's unmapped
                    $d->{_lab_test_id} #  for finding NGIS test code:
                        = $section_lab_test_id->{Molecular}->{$panel_name};
                 }
                elsif ( grep $test_name eq $_, @set ) {
                    # set a flag for later (only once per test in case it's
                    # re-requested or is part of another panel):
                    next DATA if $seen{$test_name}++;
                    $log->info("$test_name is in $panel_name set, flagging for deletion");
                    # debug("$test_name is in $panel_name set, flagging for deletion")
                    $d->{DELETE_ME}++;
                }
            }
        }
    } # p $ref;
    $log->info('=' x 30);
    # return original hashrefs not flagged for deletion:
    my @data = map $_, grep {! $_->{DELETE_ME} } @$ref; # say 'new rows'; p @data;
    return wantarray ? @data : \@data;
}

sub get_section_lab_tests {
    my $lab_sections = shift; # aref

    my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
    my %args = (
		-columns  => [ qw/lt.field_label lt.id/ ],
		-from     => [ -join => @rels ],
		-where    => { 'ls.section_name' => undef }, # defined in block
    ); # p @args;
    my %h;
    for my $section (@$lab_sections) {
        $args{'-where'}{'ls.section_name'} = $section;
        my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind;
             $dbix->dump_query($sql, @bind) if $query_output; # exit;
        $h{$section} = $dbix->query( $sql, @bind )->map;
    } # p \%h;
    return \%h;
}

sub get_panel_lab_tests {
    my $lab_section = shift; # aref

    my @cols = (
    	'lt1.field_label|panel',
        'lt2.field_label|test',
    );
    my @rels = (                         'panel_lab_test|plt'  ,
        'plt.panel_test_id=lt1.id'    => 'lab_tests|lt1'       ,
        'plt.lab_test_id=lt2.id'      => 'lab_tests|lt2'       ,
        'lt1.lab_section_id=ls.id'    => 'lab_sections|ls'     ,
    );
    my %panel_tests;
    for my $section (@$lab_section) {
        # need to *locally* exclude DNA extraction, if not already excluded:
        my @local_excluded_tests = @{ $excluded_lab_tests->{$section} };
        push @local_excluded_tests, 'DNA extraction'
            unless grep { $_ eq 'DNA extraction' } @local_excluded_tests;

        my %where = (
            'lt1.field_label' => { -not_in => \@local_excluded_tests },
            'lt2.field_label' => { -not_in => \@local_excluded_tests },
            'ls.section_name' => $section,
            'lt1.is_active'   => 'yes',
        );
        my @args = (
            -columns  => \@cols,
            -from     => [ -join => @rels ],
            -where    => \%where,
        ); # p @args;
        my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
             $dbix->dump_query($sql, @bind) if $query_output; # exit;
        my $query = $dbix->query( $sql, @bind );

        my %h;
        while ( my $ref = $query->hash ) {
            my $panel = $ref->{panel};
            push @{ $h{$panel } }, $ref->{test};
        } # p %h;
        # re-arrange in order of size of panel, largest 1st:
        my @data = map [ +($_ => $h{$_}) ],
            sort { scalar @{ $h{$b} } <=> scalar @{ $h{$a} } } keys %h; # p @data;
        $panel_tests{$section} = \@data;
    }
    return \%panel_tests;
}

sub test_name_order { # p $a; p $b;
    my ($req_num_a, $yr_a) = split '/', $a->[0];
    my ($req_num_b, $yr_b) = split '/', $b->[0];

    my $test_name_a = $a->[1];
    my $test_name_b = $b->[1];

    return $test_name_a cmp $test_name_b
        || $yr_a <=> $yr_b || $req_num_a <=> $req_num_b;
}

sub _do_log { # p %STASH;
    my $no_ngis_code = $STASH{no_ngis_code};
    my $unreported   = $STASH{unreported};
    my $no_clinical_indication = $STASH{no_clinical_indication};
    my $no_available_ngis_code = $STASH{no_available_ngis_code};

    my $filename = sprintf '%s/geneq_data_extract_log_%s.xlsx',
        $Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y');
    my $xl = Local::WriteExcel->new( filename => $filename ); # p $xl;

    if ($no_clinical_indication) {
        $xl->worksheet_name('no clinical indication');
        my @cols = qw( lab_num test_name presentation diagnosis );
        $xl->write_bold_row(\@cols);
        $xl->write_row($_) for @$no_clinical_indication;
    }
    if ($no_available_ngis_code) {
        $xl->worksheet_name('no available ngis test-code');
        my @cols = qw( lab_num test_name ngis_codes diagnosis
            clinical_indication presentation );
        $xl->write_bold_row(\@cols);
        $xl->write_row($_) for @$no_available_ngis_code;
    }
    if ($unreported) {
        $xl->worksheet_name('unreported');
        $xl->write_bold_row([ qw/lab_num test_name presentation/] );
        $xl->write_row($_) for @$unreported;
    }
    if ($no_ngis_code) { # p $no_ngis_code;
        $xl->worksheet_name('no ngis test-code');
        $xl->write_bold_row([ qw/test_name test_type lab_section count/ ] );
        $xl->write_row([ (split ' :: ', $_ ), $no_ngis_code->{$_} ])
            for sort keys %{ $no_ngis_code };
    }

    if ($JUST_TESTING) {
        $xl->save();
    }
    else {
        my $subject = 'GeNEQ data extraction log '
            . $ref_date->strftime('%b %Y'); # p $subject;
        # capture filename from its full path:
        $filename =~ /(geneq_data_extract_log_.*)/; # p $1;

        my %mail = (
            attachment => $xl->data,
            filename   => $1, # captured from above
            subject    => $subject,
            config     => $config,
        );
        $tools->send_mail(\%mail, \@log_recipients);
    }
    { # text file:
        # log summary data to $logfile, override any previous adapter settings
        Log::Any::Adapter->set(File => $logfile);

        $log->info('DIAGNOSIS & PRESENTATION WITH NO CLINICAL INDICATION');
        $log->info(join ' :: ', @$_)
            for sort test_name_order @$no_clinical_indication;

        $log->info('DIAGNOSIS WITH NO AVAILABLE NGIS CODE & PRESENTATION '
                . 'WITH NO CLINICAL INDICATION');
        $log->info(join ' :: ', @$_)
            for sort test_name_order @$no_available_ngis_code;

        $log->info('UNREPORTED REQUESTS');
        $log->info(join ' :: ', @$_) for @$unreported;

        $log->info('LAB TESTS WITH NO NGIS ID');
        $log->info(join ':: ', $_, $no_ngis_code->{$_})
            for sort keys %{ $no_ngis_code };
    }
}