#!/usr/bin/env perl
=begin -------------------------------------------------------------------------
NGIS data extraction; check ngis_* tables up-to-date
runs at 6am on 1st & 3rd Friday of month via cron:
0 6 1-7 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ...
0 6 15-21 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ...
manual run: $0 [-m <months>, -t <testing: saves files locally, doesn't email>,
-q <query output>, --console <verbose log>, --file <log to file>,
--cumulative <all requests since 1st of month of ref date>]
maps hilis4 lab tests to an NGIS test code using hilis4.ngis_lab_test table:
if lab_test_id entry(s) exist in ngis_lab_test table:
* if single row then use ngis_test_code value (diagnosis doesn't matter so request
doesn't need to be reported)
* if lab_test has multiple ngis_test_codes, the diagnosis clinical indication code
(ngis_diagnosis.ngis_indication_id) is used to determine which (if any)
ngis_test_code to use, where the diagnosis clinical indication must match
one of the available ngis test id's eg HTS myeloid test has matching codes
for a diagnosis of MDS, MPN, etc but not for CML
* if NGIS test code cannot be found on diagnosis, a loose matching on presentation
is attempted, including previous screen terms used, eg 'MDS' matches
'Suspected MDS', 'Follow-up MDS'
* last resort is to force NGIS test code onto some test names (eg HTS myeloid)
* recipient does not want technology (FISH, PCR, HTS, etc) so ETV6/RUNX1 (FISH)
& ETV6-RUNX1 (PCR) potentially ambiguous
* molecular tests are reassembled into panels (if panel has NGIS code)
* FISH panels without NGIS codes are expanded to individual tests
TODO: ngis_test_code list (@$ngis_test_code) with multiple identical indication
codes (eg HTS CLL/MZL, ABL1 kinase p190, etc) - 1st one (in ASCII sort order) is
selected that matches diagnosis indication code, this may not be correct
complications (see geneq_data_extract.log):
* hilis4 lab-tests/panels without ngis test id's - require default codes
* lab tests/panels without a clinical indication mapped to diagnosis or screen
* diagnostic terms without clinical indication (eg 'see comments', 'lab tests only', etc)
* requests not reported when data collected
=cut
#-------------------------------------------------------------------------------
#===============================================================================
my @data_recipients = qw( paul.mcintosh john.fraser raj );
my @log_recipients = qw( turner talley raj );
#===============================================================================
my $query_output = 0; # --query|q - output sql queries to console
my $log_console = 0; # --console - verbose log to console
my $verbose_log = 0; # --verbose - verbose log to file (overrides --console)
my $cumulative = 0; # --cumulative - all requests since 1st of month of ref_date
my $duration = 1; # --month|m - default unless specified in command-line opts
my $testing = 0; # --testing|t - saves file locally, doesn't email
use Getopt::Long;
GetOptions (
"months|m=i" => \$duration, # int
"testing|t" => \$testing, # flag
"query|q" => \$query_output, # flag
"console" => \$log_console, # flag
"verbose" => \$verbose_log, # flag
"cumulative" => \$cumulative, # flag
); # warn $duration; warn $cumulative; exit;
use strict;
use warnings;
use feature 'say';
my $JUST_TESTING = $testing || 0;
use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;
use IO::All;
use Data::Dumper;
use Clone 'clone';
use Data::Printer;
use List::Compare;
use Log::Any qw($log);
use Log::Any::Adapter;
use SQL::Abstract::More;
use List::Util qw(maxstr);
use Spreadsheet::WriteExcel::Simple;
use Time::HiRes qw(gettimeofday tv_interval);
use lib $Bin . '/../../../lib';
use LIMS::Local::ScriptHelpers;
use lib '/home/raj/perl-lib';
use Local::WriteExcel;
use Local::DB;
my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);
my $config = $tools->config();
my $today = $tools->time_now;
my $dbix = Local::DB->dbix({ dbname => 'hilis4' });
$dbix->lc_columns = 0; # preserve mixed case on col names (now standardised)
$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col)
my %STASH; # for logging/debugging
#-------------------------------------------------------------------------------
my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date;
my $filename = sprintf 'GeNEQ_HMDS_data_%s.xls', $ref_date->strftime('%b_%Y'); # warn $filename;
my $subject = sprintf 'GeNEQ HMDS data %s', $ref_date->strftime('%b %Y');
my $logfile = sprintf '%s/geneq_data_extract_%s.log',
$Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y'); # warn $logfile;
# for verbose logging to console (can be set or reset later):
Log::Any::Adapter->set('Stdout') if $log_console;
# for verbose logging to logfile (can be set or reset later):
Log::Any::Adapter->set(File => $logfile) if $verbose_log;
# check cytogenetics
my @lab_sections = (
'FISH', 'Molecular', 'Micro-array', 'High-throughput sequencing',
'Multiplex Ligation-dependent Probe Amplification'
);
# skip these lab-tests:
my %excluded_lab_tests = (
FISH => [(
'Christie Myeloma Fusion-1',
'Christie Myeloma Fusion-2',
'Christie Myeloma Screen',
'Cell selection quality',
'FISH H & E review',
'Cytocell CEN11',
'Cytocell CEN12',
'Cytocell CEN17',
'Cytocell CEN18',
'Cytocell CEN7',
'Cytocell CEN8',
'Cytocell CEN9',
'Christie CLL',
'Abbott CEN11',
'Abbott CEN18',
'Abbott CEN7',
'Abbott CEN8',
'PNH myeloid',
'CLL trial',
'EBV ISH',
)],
Molecular => [(
'100KG DNA extraction',
'Sanger sequencing',
'Refer material',
'DNA control',
'ABL control',
)],
);
# tests which are confirmatory and should not be included:
push @{ $excluded_lab_tests{FISH} }, 'Zytolight FGFR3';
push @{ $excluded_lab_tests{Molecular} }, ('SRSF2', 'ASXL1');
# tests not currently on NGIS test directory (needs to be adaptable):
push @{ $excluded_lab_tests{FISH} }, ('ATM', 'Cytocell ATM');
push @{ $excluded_lab_tests{Molecular} }, # panels:
('Burkitt mutation', 'CNS/testicular DLBCL'); # p %excluded_lab_tests;
{ # may need to exclude D/RNA extractions in future:
my @extractions = ( 'DNA extraction', 'RNA extraction', 'CD138+ DNA' );
# push @{ $excluded_lab_tests{Molecular} }, @extractions;
}
# exclude all quantification & store D/RNA tests:
my $excluded_test_name_re = 'quantification|store_[dr]na';
my @excluded_parent_codes = (
'RWM', # Cardiff
'RVA', # Carmarthen
'RT8', # Conwy
'RVF', # Gwent
'RT9', # NE Wales
'RT7', # NW Wales
'RVC', # Swansea
'RQF', # Velidre, Wales
'8EQ15', # Drs Lab
'MSDMDH', # Mater Dei
'X99999', 'V81999' # unknowns
);
# look for loose match with presentation term:
my %presentation_types = (
AML => 'M80',
CML => 'M84',
MDS => 'M82',
MPN => 'M85',
PNH => 'M82',
Hodgkin => 'M95',
'B-ALL' => 'M91',
myeloma => 'M92', # case-sensitive
Aplastic => 'M82',
Cytopenia => 'M82', # MDS
# forcing lymphomas to clinical indication code for high-grade lymphoma (M99):
# TODO: maybe better to force to prehistoric term B-NHL ??
'Probable B-LPD' => 'M99',
'Suspected lymphoma' => 'M99',
'Suspected B-cell lymphoma' => 'M99',
);
# force specific NGIS test codes (last resort) - TODO: replace with defaults for all tests:
my %forced_ngis_codes = (
'BCR-ABL-tests' => 'M84.1', # CML (regex match in try_presentation())
'RUNX1-RUNX1T1' => 'M80.7', # AML
'CBFbeta MYH11' => 'M80.7', # AML
'HTS myeloid' => 'M85.2', # Myeloproliferative neoplasm
'CytoSNP' => 'M82.2', # MDS
);
# molecular lab-tests done on RNA (the rest done on DNA):
my $rna_tests = do {
my @skipped_tests = ( 'FLT3 ITD', 'FLT3 TKD', 'NPM1' ); # can be done on both DNA & RNA
get_rna_tests(\@skipped_tests);
}; # p $rna_tests; exit;
# get all panel tests (omitting excluded tests) for FISH & Moelcular sections:
my $panel_lab_test = get_panel_lab_tests(['FISH','Molecular']); # p $panel_lab_test; exit;
# get lab-tests auto-requested at registration (discontinued):
# my $registration_lab_tests = get_registration_lab_test_requests(); # p $registration_lab_tests;
# get list of section lab test id's (for inter-converting tests & panels):
my $section_lab_test_id = get_section_lab_tests(['FISH', 'Molecular']); # p $section_lab_test_id;
# get mapped HILIS4 lab_test.id => NGIS test code:
my $ngis_lab_tests = get_ngis_lab_tests(); # p $ngis_lab_tests;
#-------------------------------------------------------------------------------
my ($sql, @bind) = _get_main_query(); # p $sql; p @bind;
my $t0 = [gettimeofday];
my $query = $dbix->query( $sql, @bind );
# get cols from query, except 'private' ones used for evaluations (eg _datetime_screened):
my @headers = grep $_ !~ /^_/, $query->columns; # p @headers;
my @data = $query->hashes; runtimer('query runtime');
my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row(\@headers);
runtimer('commencing data processing');
# get request_ids and DNA/RNA extraction dates into hash map:
my $extraction_dates = do {
my @request_ids = map $_->{internal_request_id}, @data;
get_extraction_dates_for_requests(\@request_ids);
}; # io('extraction_dates.txt')->appendln($_) for keys %$extraction_dates;
# pre-process data to reconstitute (some) molecular panels:
runtimer('reconstituting molecular panels');
@data = reconstitute_molecular_panels(\@data); # p @data;
# pre-process data to split (some) FISH panels (too complex to combine with above):
runtimer('splitting FISH panels');
@data = split_fish_panels(\@data); # p @data;
runtimer('finished pre-processing data');
# process @data:
for my $ref (@data) {
my $data = process_data($ref); # returns arrayref
$xl->write_row($data);
}
runtimer('completed data processing');
if ($JUST_TESTING) {
$xl->save(join '/', $Bin, $filename);
}
else {
my %mail = (
attachment => $xl->data,
filename => $filename,
subject => $subject,
config => $config,
);
$tools->send_mail(\%mail, \@data_recipients);
}
# log all misfits:
_do_log();
#===============================================================================
sub runtimer {
return unless $JUST_TESTING;
say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday];
}
sub debug {
return unless $JUST_TESTING;
say $_[0];
}
sub process_data {
my $ref = shift; # href
# NGIS test code:
$ref->{test_id} = get_ngis_test_code($ref);
# determine date lab-test requested (replaced by datetime_order_received):
# $ref->{datetime_test_requested} = get_test_request_date($ref); # will need to be revived
# set datetime_testing_complete to test completion datetime:
$ref->{datetime_testing_complete} = $ref->{_datetime_test_completed};
{ # set datetime_reporting_complete to later of testing_complete or authorisation:
if ( $ref->{_datetime_authorised} ) {
my @dates = @{$ref}{ qw/_datetime_authorised _datetime_test_completed/ };
# dates in yyyy-mm-dd format can be handled as strings for determination
# of chronology using List::Util::maxstr or could do $dateA gt $dateB:
$ref->{datetime_reporting_complete} = maxstr(@dates); # List::Util
}
else { # not authorised, set to _datetime_test_completed:
$ref->{datetime_reporting_complete} = $ref->{_datetime_test_completed}
}
}
=begin # datetime_processing_complete now set to D/RNA extraction date
# set datetime_processing_complete to later of authorisation date or test
# completion date; value will be null if request not yet authorised:
# p $ref->{sample_processed_date}; p $ref->{_datetime_authorised};
if ( $ref->{_datetime_authorised} ) {
my @dates = @{$ref}{ qw/_datetime_authorised datetime_testing_complete/ };
# dates in yyyy-mm-dd format can be handled as strings for determination
# of chronology using List::Util::maxstr or could do $dateA gt $dateB:
$ref->{datetime_processing_complete} = maxstr(@dates); # List::Util
} # else sample_completed_date remains null
=cut
$ref->{datetime_processing_complete} = get_processing_date($ref);
# new template wants some cols temporarily NULL (commented out in query now):
my @null_fields = qw//;
$ref->{$_} = undef for @null_fields;
return [ @{$ref}{@headers} ];
}
sub _main_query_cols {
my @cols = (
'CONCAT_WS("/", r.request_number, r.year - 2000)|local_sample_id',
# 'NULL|local_sample_id_sub',
# 'NULL|local_dna_number',
# 'NULL|local_dna_number_sub',
'p.id|internal_patient_id',
'r.id|internal_request_id',
q!"GeNEQ Leeds (HILIS)"|booking_laboratory!,
q!"GeNEQ Leeds (HILIS)"|processing_laboratory!,
q!"GeNEQ Leeds (HILIS)"|testing_laboratory!,
q!"GeNEQ Leeds (HILIS)"|reporting_laboratory!,
# 'NULL|ped_number',
# 'p.first_name|patient_first_name',
# 'p.last_name|patient_last_name',
# 'p.dob|patient_dob',
'p.gender|patient_sex',
# 'p.nhs_number',
# 'pd.address|patient_address',
# 'NULL|patient_district_of_residence',
'NULL|patient_city',
# 'pd.post_code|patient_postcode',
# 'NULL|patient_country',
'ref.national_code|referrer_ods_code',
# 'NULL|referrer_email',
# 'NULL|referrer_telephone',
# 'NULL|referrer_title',
# 'NULL|referrer_first_name',
# 'ref.name|referrer_last_name',
'hd.display_name|referrer_specialty',
# 'rs.display_name|referring_facility_name',
# 'NULL|referrer_address',
# 'NULL|referrer_city',
# 'NULL|referrer_district',
# 'NULL|referrer_postcode',
# 'NULL|referrer_county',
'rs.organisation_code|referring_facility_ods_code',
# 'NULLIF( pc.unit_number, "UNKNOWN")|unit_number', # NULL if a = b
# 'NULL|other_unit_number',
# 'IFNULL(rrv.clinical_details, "DiagnosticTest")|referral_reason', # a or b
q!"Routine"|test_priority!,
'NULL|test_id', # NGIS test id (eg M80.6)
'lt.field_label|test_name',
# 'ls.section_name|lab_section',
'NULL|ngis_id', # NGIS order number ?
# 'NULL|datetime_test_requested', # replaced by datetime_order_received
# 'lt.test_type|request_comments', # panel or test
q!GROUP_CONCAT(DISTINCT s.description SEPARATOR '; ')
AS sample_type_received!,
'rsd.specimen_date|datetime_sample_collected', # datetime
'r.created_at|datetime_sample_received', # datetime
'r.created_at|datetime_order_received', # datetime
'NULL|datetime_processing_complete', # set later
'NULL|datetime_testing_complete', # set later
'NULL|datetime_reporting_complete', # set later
'rsd.specimen_quality|sample_condition',
# q!MAX( CASE WHEN ao.option_name = 'doi'
# THEN 'yes' ELSE 'no' END ) AS infection_risk!,
# 'NULL|sample_type_dispatched',
# 'IF( rst.volume, "ul", NULL )|sample_units',
# 'rst.volume|sample_amount',
# q!IF( rst.concentration < 9999, rst.concentration, NULL )
# AS dna_concentration!,
# 'NULL|sample_date_sent',
# 'rst.vialId|sample_tube_id',
# 'sr.plateId|sample_rack_id',
# 'NULL|sample_courier',
# 'NULL|sample_courier_ref',
# 'NULL|sample_processed_date', # renamed datetime_processing_complete
# 'NULL|sample_completed_date', # renamed datetime_testing_complete
# fields used for data processing only (underscored for omission in @headers):
'lt.id|_lab_test_id',
'lt.test_type|_test_type',
'lt.test_name|_test_name',
'ls.section_name|_section_name',
'd.name|_diagnosis',
'nci.code|_clinical_indication_code',
's2.description|_presentation',
'rrv.diagnosis_id|_diagnosis_id',
# q!MAX( CASE WHEN rsv.action = 'screened' THEN rsv.time END )
# AS _datetime_screened!, # not used
# q!MAX( CASE WHEN rsv.action = 'reported' THEN rsv.time END )
# AS _datetime_reported!, # not used
q!MAX( CASE WHEN rsv.action = 'authorised' THEN rsv.time END )
AS _datetime_authorised!,
'ts.time|_datetime_test_completed', # date lab-test set to complete
);
return wantarray ? @cols : \@cols;
}
sub _get_main_query {
my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!;
my $begin_date = $ref_date->clone->set_day(1); # first day of $ref_date month
my %date_restriction = $cumulative # if --cumulative Getopt:
? ( '>=', $begin_date->ymd ) # all since ref_date
: ( -between => [ # first & last day of ref_date month:
$begin_date->ymd, # 1st day
$begin_date->add(months => 1)->subtract(days => 1)->ymd, # last day
] );
my $excluded_lab_test_ids = get_excluded_lab_test_ids();
my $cols = _main_query_cols(); # these keep changing
my @rels = ( 'requests|r' ,
q{r.patient_case_id=pc.id} => 'patient_case|pc' ,
q{pc.patient_id=p.id} => 'patients|p' ,
q{pc.referral_source_id=rs.id} => 'referral_sources|rs' ,
q{rs.parent_organisation_id=po.id} => 'parent_organisations|po' ,
q{r.referrer_department_id=rd.id} => 'referrer_department|rd' ,
q{rd.hospital_department_code=hd.id} => 'hospital_departments|hd' ,
q{rd.referrer_id=ref.id} => 'referrers|ref' ,
q{rsd.request_id=r.id} => 'request_specimen_detail|rsd' ,
q{rsp.request_id=r.id} => 'request_specimen|rsp' ,
q{rsp.specimen_id=s.id} => 'specimens|s' ,
q{ts.request_id=r.id} => 'request_lab_test_status|ts' ,
q{ts.status_option_id=so.id} => 'lab_test_status_options|so' ,
q{ts.lab_test_id=lt.id} => 'lab_tests|lt' ,
q{lt.lab_section_id=ls.id} => 'lab_sections|ls' ,
q{rsv.request_id=r.id} => 'request_status_view|rsv' ,
q{ris.request_id=r.id} => 'request_initial_screen|ris' ,
q{ris.screen_id=s2.id} => 'screens|s2' ,
# left joins:
q{=>rrv.request_id=r.id} => 'request_report_view|rrv' ,
q{=>rrv.diagnosis_id=d.id} => 'diagnoses|d' ,
q{=>nd.diagnosis_id=d.id} => 'ngis_diagnosis|nd' ,
q{=>nd.ngis_indication_id=nci.id} => 'ngis_clinical_indications|nci',
q{=>ro.request_id=r.id} => 'request_option|ro' ,
q{=>ro.option_id=ao.id} => 'additional_options|ao' ,
q{=>pd.patient_id=p.id} => 'patient_demographics|pd' ,
q{=>rst.request_id=r.id} => 'request_storage|rst' ,
q{=>rst.rack_id=sr.id} => 'storage_racks|sr' ,
q{=>rt.request_id=r.id} => 'request_trial|rt' ,
);
my %where = (
-and => [ # repeated elements (eg parent_code), so requires arrayref
'rt.request_id' => undef, # not in clinical trial
'so.description' => 'complete', # lab-test status
'ls.section_name' => { -in => \@lab_sections },
'rs.organisation_code' => { -not_rlike => '^NT' }, # private hospital
'po.parent_code' => { -not_rlike => '^(S|Z)' }, # scotland/ireland
'po.parent_code' => { -not_in => \@excluded_parent_codes },
'DATE(ts.time)' => \%date_restriction,
'lt.test_name' => { -not_rlike => $excluded_test_name_re },
'lt.id' => { -not_in => $excluded_lab_test_ids },
# to restrict to specific request id's:
# 'r.id' => { -in => [395682] },
],
);
my @args = (
-columns => $cols,
-from => [ -join => @rels ],
-where => \%where,
-group_by => [ 'lt.id', 'r.id' ],
-having => { $is_private => undef }, # not private patient
-order_by => 'r.id',
#-limit => 100,
#-offset => 100,
); # p @args;
# need to supply our own injection_guard to override built-in sql injection
# attack prevention which detects ';' in GROUP_CONCAT, but we don't need one
# here as we have no user input:
my $sqla = SQL::Abstract::More->new( injection_guard => qr/^$/ );
my ($sql, @bind) = $sqla->select(@args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
return ($sql, @bind);
}
sub get_excluded_lab_test_ids {
my @test_ids;
my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
my %args = (
-columns => 'lt.id',
-from => [ -join => @rels ],
);
while ( my($section, $ref) = each %excluded_lab_tests ) {
$args{'-where'} = {
'ls.section_name' => $section,
'lt.field_label' => { -in => $ref },
};
my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
my @ids = $dbix->query($sql, @bind)->column; # ref
push @test_ids, @ids;
} # p @test_ids;
return wantarray ? @test_ids : \@test_ids;
}
sub get_extraction_dates_for_requests {
my $request_ids = shift;
my @cols = (
'ts.request_id',
q/IF(lt.test_name='cd138_dna','CD138DNA',LEFT(lt.field_label,3))|type/,
'ts.time',
);
my @rels = ( 'request_lab_test_status|ts',
'ts.lab_test_id=lt.id' => 'lab_tests|lt' ,
'ts.status_option_id=so.id' => 'lab_test_status_options|so',
);
my @extraction_tests = qw( dna_extraction rna_extraction cd138_dna );
my %where = (
'so.description' => 'complete',
'lt.test_name' => { -in => \@extraction_tests },
'ts.request_id' => { -in => $request_ids },
);
my @args = (
-columns => \@cols,
-from => [ -join => @rels ],
-where => \%where,
); # p @args;
my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
my $query = $dbix->query( $sql, @bind );
my %h;
while ( my $ref = $query->hash ) {
my $request_id = $ref->{request_id};
my $datetime = $ref->{time};
my $type = $ref->{type};
$h{$request_id}{$type} = $datetime;
}
return \%h;
}
sub get_processing_date {
my $ref = shift;
# FISH tests not done on D/RNA extraction so can't get a processing date:
return if $ref->{_section_name} eq 'FISH';
my $test_name = $ref->{_test_name}; # p $test_name;
my $lab_num = $ref->{local_sample_id};
my $req_id = $ref->{internal_request_id}; # p $req_id;
# get extraction dates for DNA and/or RNA:
my $extraction = $extraction_dates->{$req_id}; # p $extraction;
debug("no extraction for $lab_num $test_name") and return
unless $extraction;
# get expected material, RNA if test_name in %rna_tests, otherwise assume
# DNA, except a small minority of tests done on CD138-selected DNA (see below):
my $material = $rna_tests->{$test_name} ? 'RNA' : 'DNA';
# get extraction date for test material:
my $date = $extraction->{$material};
# some tests (eg MLPA myeloma) are done on CD138-selected DNA -
# key = CD138DNA in %extraction_dates:
$date ||= $extraction->{CD138DNA}; # or give up - no extraction date available:
debug("no extraction date for $lab_num $test_name $material") and return
unless $date;
return $date;
}
=begin # not used - replaced by datetime_order_received
sub get_test_request_date {
my $ref = shift; # p $ref;
my $request_id = $ref->{internal_request_id};
my $registered = $ref->{datetime_sample_received};
my $test_name = $ref->{test_name};
my $screened = $ref->{_datetime_screened};
# use manual request date if exists, or auto-request date, or date of screening:
my $manual_request_date = get_manual_test_request($request_id, $test_name);
# auto-requested by sample-type, therefore date = registration:
my $auto_request_date = $registration_lab_tests->{$test_name}
? $registered : 0; # zero OK as will be tested for truth below
# p [$manual_request_date, $auto_request_date, $screened];
return $manual_request_date || $auto_request_date || $screened;
}
=cut
=begin # not used:
sub get_registration_lab_test_requests {
my @cols = ( 'DISTINCT t2.field_label', 1 );
my @rels = ('specimen_lab_test|t1', 't1.lab_test_id=t2.id', 'lab_tests|t2');
my @args = (
-columns => \@cols,
-from => [ -join => @rels ],
); # p @args;
my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
return $dbix->query($sql, @bind)->map;
}
=cut
sub get_rna_tests {
my $skipped_tests = shift;
my @rels = ( 'linked_lab_test|llt' ,
'llt.parent_test_id=lt1.id' => 'lab_tests|lt1' ,
'llt.linked_test_id=lt2.id' => 'lab_tests|lt2' ,
);
my @args = (
-columns => [ 'lt1.test_name', 1 ],
-from => [ -join => @rels ],
-where => {
'lt1.field_label' => { -not_in => $skipped_tests },
'lt2.test_name' => 'rna_extraction',
},
); # p @args;
my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
my $map = $dbix->query( $sql, @bind )->map;
# add rna_extraction in case extraction tests are required in data return:
$map->{rna_extraction} = 1;
return $map;
}
=begin # called from get_test_request_date(), not used
sub get_manual_test_request {
my ($request_id, $test_name) = @_;
my @args = (
-columns => [ 'time' ],
-from => [ 'request_lab_test_history' ],
-where => {
request_id => $request_id,
action => {
-rlike => '^(auto-)?requested( linked test)? ' . $test_name
},
},
-order_by => [ '-time' ], # ORDER BY time DESC
-limit => 1, # returns most recent only if >1
); # p @args;
my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
# $dbix->dump_query($sql, @bind) if $query_output; # exit;
return $dbix->query($sql, @bind)->value;
}
=cut
sub get_ngis_test_code {
my $ref = shift; # p $ref if $ref->{test_name} =~ /Burkitt mutation/;
my $presentation = $ref->{_presentation};
my $diagnosis_id = $ref->{_diagnosis_id};
my $lab_test_id = $ref->{_lab_test_id};
my $lab_section = $ref->{_section_name};
my $test_name = $ref->{test_name}; # say $test_name;
my $test_type = $ref->{_test_type};
my $diagnosis = $ref->{_diagnosis} || '';
my $lab_num = $ref->{local_sample_id};
my $presentation_diagnosis = join '::', $presentation,
$diagnosis || '[NOT REPORTED]'; # p $presentation_diagnosis;
# get NGIS code(s) for this lab-test:
my $ngis_test_code = $ngis_lab_tests->{$lab_test_id}; # p $ngis_test_code; # aref
unless ($ngis_test_code) {
my $title =join ' :: ', $test_name, $test_type, $lab_section;
$STASH{no_ngis_code}{$title}++;
# debug("$lab_num: no NGIS id for $test_name");
return $test_name !~ /[DR]NA extraction|CD138\+ DNA/ # return undef if extraction
? $presentation_diagnosis
: undef;
}
# return NGIS test code if it's the only one (even if not supported clinical
# indication; TODO: this might not be suitable, might need to consider diagnosis):
return $ngis_test_code->[0] if scalar @$ngis_test_code == 1;
# now have multiple NGIS id's for lab-test
# common message for debugging:
my $msg = qq!$lab_num: multiple NGIS codes for '$test_name'!;
# vars for try_presentation():
my %vars_for_presentation = ( data => $ref, ngis_code => $ngis_test_code );
# now need to examine diagnosis:
if (! $diagnosis_id) {
if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
# debug("found $ngis_code from $presentation");
return $ngis_code;
}
# io($log)->appendln("$msg and request is not reported");
push @{ $STASH{unreported} }, [ $lab_num, $test_name, $presentation ];
return $presentation_diagnosis;
}
# get clinical indication for diagnosis (eg AML NOS = M80)
my $indication = $ref->{_clinical_indication_code}; # p $indication;
if (! $indication) { # debug('no clinical indication');
if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
# debug("found $ngis_code from $presentation");
return $ngis_code;
}
#io($log)->appendln("$msg and '$diagnosis' has no clinical indication id"
# . " [presentation: $presentation]");
push @{ $STASH{no_clinical_indication} },
[ $lab_num, $test_name, $presentation, $diagnosis ];
return $presentation_diagnosis;
} # p $indication; p $ngis_ids;
# return NGIS code from list if has a matching clinical indication:
for my $ngis_code (@$ngis_test_code) { # warn $ngis_code;
# debug("$lab_num: $indication -> $ngis_code") if $ngis_code =~ /^$indication/;
return $ngis_code if $ngis_code =~ /^$indication/;
}
# last resort is to try to find NGIS code from presentation:
if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) {
# debug("found $ngis_code from $presentation");
return $ngis_code;
}
# can't find an NGIS code for lab-test from presentation or diagnosis:
#io($log)->appendln("$msg and diagnosis indication $indication [$diagnosis] "
# . "did not match any available NGIS test codes [presentation: $presentation]");
# debug("no NGIS id available for $test_name");
push @{ $STASH{no_available_ngis_code} },
[ $lab_num, $test_name, $diagnosis, $indication, $presentation ];
# return undef;
return $presentation_diagnosis;
}
sub try_presentation { # try to look for a loose match with presentation term:
my $args = shift;
my $presentation = $args->{data}->{_presentation};
my $request_id = $args->{data}->{internal_request_id};
my $ngis_codes = $args->{ngis_code};
my $test_name = $args->{data}->{test_name};
# p $presentation; p $ngis_codes; p $test_name;
# get any previous screening terms from history file:
my @all_screens = get_request_history($request_id);
push @all_screens, $presentation; # p @all_screens;
for my $screen (@all_screens) {
for my $type ( keys %presentation_types ) { # p $type; # eg AML, MDS
if ( $screen =~ /$type/ ) { # eg Suspected MDS =~ /MDS/
# debug("$screen matches $type");
for my $code (@$ngis_codes) { # eg M84.2
# p $code; p $presentation_types{$type};
if ( $code =~ /^$presentation_types{$type}/ ) { # eg M84.2 =~ /^M84/
# debug("$code matches $presentation_types{$type}");
return $code;
}
}
}
}
}
{ # if we get this far, no other match possible, force NGIS code if test
# configured in %forced_ngis_codes:
for my $test ( keys %forced_ngis_codes ) {
return $forced_ngis_codes{$test} if $test_name eq $test;
}
# BCR-ABL p210 RQ or BCR-ABL multiplex:
return $forced_ngis_codes{'BCR-ABL-tests'} if $test_name =~ /BCR-ABL/;
}
return undef;
}
sub get_request_history {
my $request_id = shift; # p $request_id;
my $query = $dbix->select('request_history', 'action',
{
request_id => $request_id,
action => { -like => 'updated initial screen entry %' }
}
);
my @screens;
while ( my $action = $query->value ) { # p $action;
if ( $action =~ /updated initial screen entry (.*)/ ) {
push @screens, $1;
}
}
return wantarray ? @screens : \@screens;
}
sub get_ngis_lab_tests {
my $q = $dbix->select('ngis_lab_test', [qw/lab_test_id ngis_test_code/]);
my %h;
while ( my $ref = $q->array ) { # p $ref;
my $lab_test_id = $ref->[0];
push @{ $h{$lab_test_id} }, $ref->[1];
}
return \%h;
}
sub split_fish_panels { # only some FISH panels (those without an NGIS map):
my $data = shift; # p $data;
my $required_fish_panels = get_fish_panel_names_without_ngis_codes(); # p $required_fish_panels;
my @fish_panel_lab_tests = @{ $panel_lab_test->{FISH} };
my %non_ngis_fish_panels;
# skip panels from $fish_panel_lab_test with NGIS codes:
for my $ref (@fish_panel_lab_tests) { # AoA
my $panel_name = $ref->[0]; # p $panel_name;
next unless grep { $panel_name eq $_ } @$required_fish_panels;
# debug("$panel_name in required_fish_panels");
$non_ngis_fish_panels{$panel_name} = $ref->[1];
} # p %non_ngis_fish_panels;
# coderef for substituting test names (eg "DLBCL + FL" panel -> "DAKO MYC")
my $substitute_values = sub {
my ($row, $new_test_name) = @_;
$row->{test_name} = $new_test_name;
# also need to change lab_test_id for finding NGIS test code:
$row->{_lab_test_id} = $section_lab_test_id->{FISH}->{$new_test_name};
$row->{_test_type} = 'test'; # for log in case it's unmapped
};
ROW:
for my $row (@$data) {
my $test_name = $row->{test_name};
my $lab_num = $row->{local_sample_id};
# if test name matches one of FISH panels without an NGIS code:
if ( grep $test_name eq $_, keys %non_ngis_fish_panels ) {
# get FISH panel test names:
my @panel_tests = @{ $non_ngis_fish_panels{$test_name} };
{ # change test_name to 1st element of $panel_tests
my $new_test_name = shift @panel_tests;
debug("$lab_num: changing $test_name to $new_test_name");
&$substitute_values($row,$new_test_name);
}
# now clone $row for each remaining element in @panel_tests:
TEST:
for my $t (@panel_tests) {
my $new = clone $row;
# repeat above substitutions:
debug("$lab_num: adding new row with test-name $t");
&$substitute_values($new,$t);
push @$data, $new;
}
}
}
# re-sort by lab_number (@data now out of sequence after pushing new rows)
my @new = sort { $a->{internal_request_id} <=> $b->{internal_request_id} }
@data;
return wantarray ? @new : \@new;
}
sub get_fish_panel_names_without_ngis_codes {
my @rels = ( 'lab_tests|lt',
'lt.lab_section_id=ls.id' => 'lab_sections|ls',
'=>nlt.lab_test_id=lt.id' => 'ngis_lab_test|nlt'
);
my %where = (
'ls.section_name' => 'FISH',
'lt.test_type' => 'panel',
'lt.field_label' => { -not_in => $excluded_lab_tests{FISH} },
'nlt.lab_test_id' => undef,
);
my @args = (
-columns => 'lt.field_label',
-from => [ -join => @rels ],
-where => \%where,
); # p @args;
my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
return $dbix->query( $sql, @bind )->column;
}
sub reconstitute_molecular_panels {
my $data = shift; # p $data;
# new data file:
my @new;
# keep track of request_id:
my $last_request_id = 0;
# holder for molecular tests data:
my @molecular_tests = (); # will be destroyed and recreated after each request
ROW:
for my $row (@$data) {
my $lab_section = $row->{_section_name};
my $request_id = $row->{internal_request_id};
# if new request, handle molecular tests data, if any:
if ( @molecular_tests && $request_id != $last_request_id ) {
$log->info("molecular tests for $last_request_id:",
{ set => [ map $_->{test_name}, @molecular_tests ] } );
my @rows = molecular_tests_to_panels(\@molecular_tests); # p @rows;
push @new, @rows;
@molecular_tests = (); # reset for next request
}
push @new, $row and next ROW if $lab_section ne 'Molecular';
# have molecular data row:
push @molecular_tests, $row;
# set last_request_id to current request_id:
$last_request_id = $request_id;
} # p %tests;
return wantarray ? @new : \@new;
}
sub molecular_tests_to_panels {
my $ref = shift; # AoH's
my @molecular_panel_lab_tests = @{ $panel_lab_test->{Molecular} };
my @all_molecular_tests = map $_->{test_name}, @$ref;
# say 'test_names:'; p @all_molecular_tests;
# panels ordered in size of array, largest 1st to prevent matches with
# smaller panels having common members (eg AML 60+ & AML under-60):
PANEL:
for my $t (@molecular_panel_lab_tests) { # eg AML 60+ => [NPM1, FLT3 ITD]
my ($panel_name, $panel_tests) = @$t; # p @$panel_tests;
# don't need ordered list, use -u flag for speed boost:
my $lc = List::Compare->new('-u', $panel_tests, \@all_molecular_tests);
# if this panel list is a subset of all_molecular_tests:
if ( $lc->is_LsubsetR ) { # is left-a-subset-of-right ?
$log->info("$panel_name panel found in molecular set:",
{ panel_tests => $panel_tests } );
my @set = @$panel_tests; # localise for element substitutions
my %seen = (); # reset test name stash for this panel
# remove 1st element in @set into a variable:
my $first_element_of_set = shift @set;
# debug("first_element_of_set: $first_element_of_set");
# for test name of 1st entry in $ref matching 1st element of @set,
# change it to the matched panel_name & flag the rest for deletion:
DATA:
for my $d (@$ref) { # p $d; # data href
next DATA if $d->{DELETE_ME}; # already flagged for deletion
my $test_name = $d->{test_name}; # p $test_name;
my $lab_num = $d->{local_sample_id};
# debug("$lab_num: testing whether $test_name eq $first_element_of_set");
if ( $test_name eq $first_element_of_set ) {
$log->info("changing $test_name to $panel_name");
debug("$lab_num: changing $test_name to $panel_name");
$d->{test_name} = $panel_name;
$d->{_test_type} = 'panel'; # for log in case it's unmapped
$d->{_lab_test_id} # for finding NGIS test code:
= $section_lab_test_id->{Molecular}->{$panel_name};
}
elsif ( grep $test_name eq $_, @set ) {
# set a flag for later (only once per test in case it's
# re-requested or is part of another panel):
next DATA if $seen{$test_name}++;
$log->info("$test_name is in $panel_name set, flagging for deletion");
# debug("$test_name is in $panel_name set, flagging for deletion")
$d->{DELETE_ME}++;
}
}
}
} # p $ref;
$log->info('=' x 30);
# return original hashrefs not flagged for deletion:
my @data = map $_, grep {! $_->{DELETE_ME} } @$ref; # say 'new rows'; p @data;
return wantarray ? @data : \@data;
}
sub get_section_lab_tests {
my $lab_sections = shift; # aref
my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' );
my %args = (
-columns => [ qw/lt.field_label lt.id/ ],
-from => [ -join => @rels ],
-where => { 'ls.section_name' => undef }, # defined in block
); # p @args;
my %h;
for my $section (@$lab_sections) {
$args{'-where'}{'ls.section_name'} = $section;
my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
$h{$section} = $dbix->query( $sql, @bind )->map;
} # p \%h;
return \%h;
}
sub get_panel_lab_tests {
my $lab_section = shift; # aref
my @cols = (
'lt1.field_label|panel',
'lt2.field_label|test',
);
my @rels = ( 'panel_lab_test|plt' ,
'plt.panel_test_id=lt1.id' => 'lab_tests|lt1' ,
'plt.lab_test_id=lt2.id' => 'lab_tests|lt2' ,
'lt1.lab_section_id=ls.id' => 'lab_sections|ls' ,
);
my %panel_tests;
for my $section (@$lab_section) {
# need to *locally* exclude DNA extraction, if not already excluded:
my @local_excluded_tests = @{ $excluded_lab_tests{$section} };
push @local_excluded_tests, 'DNA extraction'
unless grep { $_ eq 'DNA extraction' } @local_excluded_tests;
my %where = (
'lt1.field_label' => { -not_in => \@local_excluded_tests },
'lt2.field_label' => { -not_in => \@local_excluded_tests },
'ls.section_name' => $section,
'lt1.is_active' => 'yes',
);
my @args = (
-columns => \@cols,
-from => [ -join => @rels ],
-where => \%where,
); # p @args;
my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
my $query = $dbix->query( $sql, @bind );
my %h;
while ( my $ref = $query->hash ) {
my $panel = $ref->{panel};
push @{ $h{$panel } }, $ref->{test};
} # p %h;
# re-arrange in order of size of panel, largest 1st:
my @data = map [ +($_ => $h{$_}) ],
sort { scalar @{ $h{$b} } <=> scalar @{ $h{$a} } } keys %h; # p @data;
$panel_tests{$section} = \@data;
}
return \%panel_tests;
}
sub test_name_order { # p $a; p $b;
my ($req_num_a, $yr_a) = split '/', $a->[0];
my ($req_num_b, $yr_b) = split '/', $b->[0];
my $test_name_a = $a->[1];
my $test_name_b = $b->[1];
return $test_name_a cmp $test_name_b
|| $yr_a <=> $yr_b || $req_num_a <=> $req_num_b;
}
sub _do_log { # p %STASH;
my $no_ngis_code = $STASH{no_ngis_code};
my $unreported = $STASH{unreported};
my $no_clinical_indication = $STASH{no_clinical_indication};
my $no_available_ngis_code = $STASH{no_available_ngis_code};
my $filename = sprintf '%s/geneq_data_extract_log_%s.xlsx',
$Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y');
my $xl = Local::WriteExcel->new( filename => $filename ); # p $xl;
if ($no_clinical_indication) {
$xl->worksheet_name('no clinical indication');
my @cols = qw( lab_num test_name presentation diagnosis );
$xl->write_bold_row(\@cols);
$xl->write_row($_) for @$no_clinical_indication;
}
if ($no_available_ngis_code) {
$xl->worksheet_name('no available ngis test-code');
my @cols = qw( lab_num test_name diagnosis clinical_indication
presentation );
$xl->write_bold_row(\@cols);
$xl->write_row($_) for @$no_available_ngis_code;
}
if ($unreported) {
$xl->worksheet_name('unreported');
$xl->write_bold_row([ qw/lab_num test_name presentation/] );
$xl->write_row($_) for @$unreported;
}
if ($no_ngis_code) { # p $no_ngis_code;
$xl->worksheet_name('no ngis test-code');
$xl->write_bold_row([ qw/test_name test_type lab_section count/ ] );
$xl->write_row([ (split ' :: ', $_ ), $no_ngis_code->{$_} ])
for sort keys %{ $no_ngis_code };
}
if ($JUST_TESTING) {
$xl->save();
}
else {
my $subject = 'GeNEQ data extraction log '
. $ref_date->strftime('%b %Y'); # p $subject;
# capture filename from its full path:
$filename =~ /(geneq_data_extract_log_.*)/; # p $1;
my %mail = (
attachment => $xl->data,
filename => $1, # captured from above
subject => $subject,
config => $config,
);
$tools->send_mail(\%mail, \@log_recipients);
}
{ # text file:
# log summary data to $logfile, override any previous adapter settings
Log::Any::Adapter->set(File => $logfile);
$log->info('DIAGNOSIS & PRESENTATION WITH NO CLINICAL INDICATION');
$log->info(join ' :: ', @$_)
for sort test_name_order @$no_clinical_indication;
$log->info('DIAGNOSIS WITH NO AVAILABLE NGIS CODE & PRESENTATION '
. 'WITH NO CLINICAL INDICATION');
$log->info(join ' :: ', @$_)
for sort test_name_order @$no_available_ngis_code;
$log->info('UNREPORTED REQUESTS');
$log->info(join ' :: ', @$_) for @$unreported;
$log->info('LAB TESTS WITH NO NGIS ID');
$log->info(join ':: ', $_, $no_ngis_code->{$_})
for sort keys %{ $no_ngis_code };
}
}