#!/usr/bin/env perl =begin ------------------------------------------------------------------------- NGIS data extraction; check ngis_* tables up-to-date runs at 6am on 1st & 3rd Friday of month via cron: 0 6 1-7 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ... 0 6 15-21 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ... manual run: $0 options: -m = months -t = testing: saves files locally, doesn't email -q = query output --cumulative = all requests since 1st of month of ref date --console = verbose log to console --verbose = verbose log to file (overrides console) maps hilis4 lab tests to an NGIS test code using hilis4.ngis_lab_test table: if lab_test_id entry(s) exist in ngis_lab_test table: * if single row then use ngis_test_code value (diagnosis doesn't matter so request doesn't need to be reported) * if lab_test has multiple ngis_test_codes, the diagnosis clinical indication code (ngis_diagnosis.ngis_indication_id) is used to determine which (if any) ngis_test_code to use, where the diagnosis clinical indication must match one of the available ngis test id's eg HTS myeloid test has matching codes for a diagnosis of MDS, MPN, etc but not for CML * if NGIS test code cannot be found on diagnosis, a loose matching on presentation is attempted, including previous screen terms used, eg 'MDS' matches 'Suspected MDS', 'Follow-up MDS' * last resort is to force NGIS test code onto some test names (eg HTS myeloid) * recipient does not want technology (FISH, PCR, HTS, etc) so ETV6/RUNX1 (FISH) & ETV6-RUNX1 (PCR) potentially ambiguous * molecular tests are reassembled into panels (if panel has NGIS code) * FISH panels without NGIS codes are expanded to individual tests TODO: for molecular panels when 1 or more tests set complete in different months, panel not recognised and separate test codes returned eg 20536/19 complications (see geneq_data_extract.log): * hilis4 lab-tests/panels without ngis test id's - require default codes * lab tests/panels without a clinical indication mapped to diagnosis or screen * diagnostic terms without clinical indication (eg 'see comments', 'lab tests only', etc) * requests not reported when data collected =cut #------------------------------------------------------------------------------- #=============================================================================== my @data_recipients = qw( paul.mcintosh john.fraser raj ); my @log_recipients = qw( turner talley raj ); #=============================================================================== my $query_output = 0; # --query|q - output sql queries to console my $log_console = 0; # --console - verbose log to console my $verbose_log = 0; # --verbose - verbose log to file (overrides --console) my $cumulative = 0; # --cumulative - all requests since 1st of month of ref_date my $duration = 1; # --month|m - default unless specified in command-line opts my $testing = 0; # --testing|t - saves file locally, doesn't email use Getopt::Long; GetOptions ( "months|m=i" => \$duration, # int "testing|t" => \$testing, # flag "query|q" => \$query_output, # flag "console" => \$log_console, # flag "verbose" => \$verbose_log, # flag "cumulative" => \$cumulative, # flag ); # warn $duration; warn $cumulative; exit; use strict; use warnings; use feature 'say'; my $JUST_TESTING = $testing || 0; use lib '/home/raj/perl5/lib/perl5'; use FindBin qw($Bin); # warn $Bin; use IO::All; use Data::Dumper; use Clone 'clone'; use Data::Printer; use List::Compare; use Log::Any qw($log); use Log::Any::Adapter; use SQL::Abstract::More; use List::Util qw(maxstr); use Spreadsheet::WriteExcel::Simple; use Time::HiRes qw(gettimeofday tv_interval); use lib $Bin . '/../../../lib'; use LIMS::Local::ScriptHelpers; use lib '/home/raj/perl-lib'; use Local::WriteExcel; use Local::DB; my $tools = LIMS::Local::ScriptHelpers->new(); $tools->test_only($JUST_TESTING); my $config = $tools->config(); my $today = $tools->time_now; my $dbix = Local::DB->dbix({ dbname => 'hilis4' }); $dbix->lc_columns = 0; # preserve mixed case on col names (now standardised) $Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col) my %STASH; # for logging/debugging #------------------------------------------------------------------------------- my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date; my $filename = sprintf 'GeNEQ_HMDS_data_%s.xls', $ref_date->strftime('%b_%Y'); # warn $filename; my $subject = sprintf 'GeNEQ HMDS data %s', $ref_date->strftime('%b %Y'); my $logfile = sprintf '%s/geneq_data_extract_%s.log', $Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y'); # warn $logfile; # for verbose logging to console (can be set or reset later): Log::Any::Adapter->set('Stdout') if $log_console; # for verbose logging to logfile (can be set or reset later): Log::Any::Adapter->set(File => $logfile) if $verbose_log; # check cytogenetics my @lab_sections = ( 'FISH', 'Molecular', 'Micro-array', 'High-throughput sequencing', 'Multiplex Ligation-dependent Probe Amplification' ); # skip these lab-tests: my $excluded_lab_tests = $tools->get_yaml_file('ngis_excluded_lab_tests'); # p $excluded_lab_tests; exit; { # may need to exclude D/RNA extractions in future: my @extractions = ( 'DNA extraction', 'RNA extraction', 'CD138+ DNA' ); # push @{ $excluded_lab_tests->{Molecular} }, @extractions; } # exclude all quantification & store D/RNA tests: my $excluded_test_name_re = 'quantification|store_[dr]na'; my @excluded_parent_codes = ( 'RWM', # Cardiff 'RVA', # Carmarthen 'RT8', # Conwy 'RVF', # Gwent 'RT9', # NE Wales 'RT7', # NW Wales 'RVC', # Swansea 'RQF', # Velidre, Wales '8EQ15', # Drs Lab 'MSDMDH', # Mater Dei 'X99999', 'V81999' # unknowns ); # look for loose match with presentation term: my %presentation_types = ( AML => 'M80', CLL => 'M94', CML => 'M84', MDS => 'M82', MPN => 'M85', PNH => 'M82', Hodgkin => 'M95', 'B-ALL' => 'M91', myeloma => 'M92', # case-sensitive Aplastic => 'M82', Cytopenia => 'M82', # MDS # forcing lymphomas to clinical indication code for high-grade lymphoma (M99): # TODO: maybe better to force to prehistoric term B-NHL ?? 'Probable B-LPD' => 'M99', 'Suspected lymphoma' => 'M99', 'Suspected B-cell lymphoma' => 'M99', ); # force specific NGIS test codes (last resort): my %forced_ngis_codes = ( 'Cytocell CCND1/IGH' => 'M102.2', # MCL # for confirmation: # 'Cytocell MECOM' => 'XTR26.a', # rare-disease temp code (breaks table joins if used in db) 'BCR-ABL-tests' => 'M84.1', # CML - regex match in try_presentation() 'RUNX1-RUNX1T1' => 'M80.7', # AML 'CBFbeta MYH11' => 'M80.7', # AML 'HTS myeloid' => 'M82.1', # MDS 'CytoSNP' => 'M82.2', # MDS ); # molecular lab-tests done on RNA (the rest done on DNA): my $rna_tests = do { my @skipped_tests = ( 'FLT3 ITD', 'FLT3 TKD', 'NPM1' ); # can be done on both DNA & RNA get_rna_tests(\@skipped_tests); }; # p $rna_tests; exit; # get all panel tests (omitting excluded tests) for FISH & Moelcular sections: my $panel_lab_test = get_panel_lab_tests(['FISH','Molecular']); # p $panel_lab_test; exit; # get lab-tests auto-requested at registration (discontinued): # my $registration_lab_tests = get_registration_lab_test_requests(); # p $registration_lab_tests; # get list of section lab test id's (for inter-converting tests & panels): my $section_lab_test_id = get_section_lab_tests(['FISH', 'Molecular']); # p $section_lab_test_id; # get mapped HILIS4 lab_test.id => NGIS test code: my $ngis_lab_tests = get_ngis_lab_tests(); # p $ngis_lab_tests; #------------------------------------------------------------------------------- my ($sql, @bind) = _get_main_query(); # p $sql; p @bind; my $t0 = [gettimeofday]; my $query = $dbix->query( $sql, @bind ); # get cols from query, except 'private' ones used for evaluations (eg _datetime_screened): my @headers = grep $_ !~ /^_/, $query->columns; # p @headers; my @data = $query->hashes; runtimer('query runtime'); my $xl = Spreadsheet::WriteExcel::Simple->new; $xl->write_bold_row(\@headers); runtimer('commencing data processing'); # get request_ids and DNA/RNA extraction dates into hash map: my $extraction_dates = do { my @request_ids = map $_->{internal_request_id}, @data; get_extraction_dates_for_requests(\@request_ids); }; # io('extraction_dates.txt')->appendln($_) for keys %$extraction_dates; # pre-process data to reconstitute (some) molecular panels: runtimer('reconstituting molecular panels'); @data = reconstitute_molecular_panels(\@data); # p @data; # pre-process data to split (some) FISH panels (too complex to combine with above): runtimer('splitting FISH panels'); @data = split_fish_panels(\@data); # p @data; runtimer('finished pre-processing data'); # process @data: for my $ref (@data) { my $data = process_data($ref); # returns arrayref $xl->write_row($data); } runtimer('completed data processing'); if ($JUST_TESTING) { $xl->save(join '/', $Bin, $filename); } else { my %mail = ( attachment => $xl->data, filename => $filename, subject => $subject, config => $config, ); $tools->send_mail(\%mail, \@data_recipients); } # log all misfits: _do_log(); #=============================================================================== sub runtimer { return unless $JUST_TESTING; say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday]; } sub debug { return unless $JUST_TESTING; say $_[0]; } sub process_data { my $ref = shift; # href # NGIS test code: $ref->{test_id} = get_ngis_test_code($ref); # determine date lab-test requested (replaced by datetime_order_received): # $ref->{datetime_test_requested} = get_test_request_date($ref); # will need to be revived # set datetime_testing_complete to test completion datetime: $ref->{datetime_testing_complete} = $ref->{_datetime_test_completed}; { # set datetime_reporting_complete to later of testing_complete or authorisation: if ( $ref->{_datetime_authorised} ) { my @dates = @{$ref}{ qw/_datetime_authorised _datetime_test_completed/ }; # dates in yyyy-mm-dd format can be handled as strings for determination # of chronology using List::Util::maxstr or could do $dateA gt $dateB: $ref->{datetime_reporting_complete} = maxstr(@dates); # List::Util } else { # not authorised, set to _datetime_test_completed: $ref->{datetime_reporting_complete} = $ref->{_datetime_test_completed} } } =begin # datetime_processing_complete now set to D/RNA extraction date # set datetime_processing_complete to later of authorisation date or test # completion date; value will be null if request not yet authorised: # p $ref->{sample_processed_date}; p $ref->{_datetime_authorised}; if ( $ref->{_datetime_authorised} ) { my @dates = @{$ref}{ qw/_datetime_authorised datetime_testing_complete/ }; # dates in yyyy-mm-dd format can be handled as strings for determination # of chronology using List::Util::maxstr or could do $dateA gt $dateB: $ref->{datetime_processing_complete} = maxstr(@dates); # List::Util } # else sample_completed_date remains null =cut $ref->{datetime_processing_complete} = get_processing_date($ref); # new template wants some cols temporarily NULL (commented out in query now): my @null_fields = qw//; $ref->{$_} = undef for @null_fields; return [ @{$ref}{@headers} ]; } sub _main_query_cols { my @cols = ( 'CONCAT_WS("/", r.request_number, r.year - 2000)|local_sample_id', # 'NULL|local_sample_id_sub', # 'NULL|local_dna_number', # 'NULL|local_dna_number_sub', 'p.id|internal_patient_id', 'r.id|internal_request_id', q!"GeNEQ Leeds (HILIS)"|booking_laboratory!, q!"GeNEQ Leeds (HILIS)"|processing_laboratory!, q!"GeNEQ Leeds (HILIS)"|testing_laboratory!, q!"GeNEQ Leeds (HILIS)"|reporting_laboratory!, # 'NULL|ped_number', # 'p.first_name|patient_first_name', # 'p.last_name|patient_last_name', # 'p.dob|patient_dob', 'p.gender|patient_sex', # 'p.nhs_number', # 'pd.address|patient_address', # 'NULL|patient_district_of_residence', 'NULL|patient_city', # 'pd.post_code|patient_postcode', # 'NULL|patient_country', 'ref.national_code|referrer_ods_code', # 'NULL|referrer_email', # 'NULL|referrer_telephone', # 'NULL|referrer_title', # 'NULL|referrer_first_name', # 'ref.name|referrer_last_name', 'hd.display_name|referrer_specialty', # 'rs.display_name|referring_facility_name', # 'NULL|referrer_address', # 'NULL|referrer_city', # 'NULL|referrer_district', # 'NULL|referrer_postcode', # 'NULL|referrer_county', 'rs.organisation_code|referring_facility_ods_code', # 'NULLIF( pc.unit_number, "UNKNOWN")|unit_number', # NULL if a = b # 'NULL|other_unit_number', # 'IFNULL(rrv.clinical_details, "DiagnosticTest")|referral_reason', # a or b q!"Routine"|test_priority!, 'NULL|test_id', # NGIS test id (eg M80.6) 'lt.field_label|test_name', # 'ls.section_name|lab_section', 'NULL|ngis_id', # NGIS order number ? # 'NULL|datetime_test_requested', # replaced by datetime_order_received # 'lt.test_type|request_comments', # panel or test q!GROUP_CONCAT(DISTINCT s.description SEPARATOR '; ') AS sample_type_received!, 'rsd.specimen_date|datetime_sample_collected', # datetime 'r.created_at|datetime_sample_received', # datetime 'r.created_at|datetime_order_received', # datetime 'NULL|datetime_processing_complete', # set later 'NULL|datetime_testing_complete', # set later 'NULL|datetime_reporting_complete', # set later 'rsd.specimen_quality|sample_condition', # q!MAX( CASE WHEN ao.option_name = 'doi' # THEN 'yes' ELSE 'no' END ) AS infection_risk!, # 'NULL|sample_type_dispatched', # 'IF( rst.volume, "ul", NULL )|sample_units', # 'rst.volume|sample_amount', # q!IF( rst.concentration < 9999, rst.concentration, NULL ) # AS dna_concentration!, # 'NULL|sample_date_sent', # 'rst.vialId|sample_tube_id', # 'sr.plateId|sample_rack_id', # 'NULL|sample_courier', # 'NULL|sample_courier_ref', # 'NULL|sample_processed_date', # renamed datetime_processing_complete # 'NULL|sample_completed_date', # renamed datetime_testing_complete # fields used for data processing only (underscored for omission in @headers): 'lt.id|_lab_test_id', 'lt.test_type|_test_type', 'lt.test_name|_test_name', 'ls.section_name|_section_name', 'd.name|_diagnosis', 'nci.code|_clinical_indication_code', 's2.description|_presentation', 'rrv.diagnosis_id|_diagnosis_id', # q!MAX( CASE WHEN rsv.action = 'screened' THEN rsv.time END ) # AS _datetime_screened!, # not used # q!MAX( CASE WHEN rsv.action = 'reported' THEN rsv.time END ) # AS _datetime_reported!, # not used q!MAX( CASE WHEN rsv.action = 'authorised' THEN rsv.time END ) AS _datetime_authorised!, 'ts.time|_datetime_test_completed', # date lab-test set to complete ); return wantarray ? @cols : \@cols; } sub _get_main_query { my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!; my $begin_date = $ref_date->clone->set_day(1); # first day of $ref_date month my %date_restriction = $cumulative # if --cumulative Getopt: ? ( '>=', $begin_date->ymd ) # all since ref_date : ( -between => [ # first & last day of ref_date month: $begin_date->ymd, # 1st day $begin_date->add(months => 1)->subtract(days => 1)->ymd, # last day ] ); my $excluded_lab_test_ids = get_excluded_lab_test_ids(); my $cols = _main_query_cols(); # these keep changing my @rels = ( 'requests|r' , q{r.patient_case_id=pc.id} => 'patient_case|pc' , q{pc.patient_id=p.id} => 'patients|p' , q{pc.referral_source_id=rs.id} => 'referral_sources|rs' , q{rs.parent_organisation_id=po.id} => 'parent_organisations|po' , q{r.referrer_department_id=rd.id} => 'referrer_department|rd' , q{rd.hospital_department_code=hd.id} => 'hospital_departments|hd' , q{rd.referrer_id=ref.id} => 'referrers|ref' , q{rsd.request_id=r.id} => 'request_specimen_detail|rsd' , q{rsp.request_id=r.id} => 'request_specimen|rsp' , q{rsp.specimen_id=s.id} => 'specimens|s' , q{ts.request_id=r.id} => 'request_lab_test_status|ts' , q{ts.status_option_id=so.id} => 'lab_test_status_options|so' , q{ts.lab_test_id=lt.id} => 'lab_tests|lt' , q{lt.lab_section_id=ls.id} => 'lab_sections|ls' , q{rsv.request_id=r.id} => 'request_status_view|rsv' , q{ris.request_id=r.id} => 'request_initial_screen|ris' , q{ris.screen_id=s2.id} => 'screens|s2' , # left joins: q{=>rrv.request_id=r.id} => 'request_report_view|rrv' , q{=>rrv.diagnosis_id=d.id} => 'diagnoses|d' , q{=>nd.diagnosis_id=d.id} => 'ngis_diagnosis|nd' , q{=>nd.ngis_indication_id=nci.id} => 'ngis_clinical_indications|nci', q{=>ro.request_id=r.id} => 'request_option|ro' , q{=>ro.option_id=ao.id} => 'additional_options|ao' , q{=>pd.patient_id=p.id} => 'patient_demographics|pd' , q{=>rst.request_id=r.id} => 'request_storage|rst' , q{=>rst.rack_id=sr.id} => 'storage_racks|sr' , q{=>rt.request_id=r.id} => 'request_trial|rt' , ); my %where = ( -and => [ # repeated elements (eg parent_code), so requires arrayref 'rt.request_id' => undef, # not in clinical trial 'so.description' => 'complete', # lab-test status 'ls.section_name' => { -in => \@lab_sections }, 'rs.organisation_code' => { -not_rlike => '^NT' }, # private hospital 'po.parent_code' => { -not_rlike => '^(S|Z)' }, # scotland/ireland 'po.parent_code' => { -not_in => \@excluded_parent_codes }, 'DATE(ts.time)' => \%date_restriction, 'lt.test_name' => { -not_rlike => $excluded_test_name_re }, 'lt.id' => { -not_in => $excluded_lab_test_ids }, # to restrict to specific request id's: # 'r.id' => { -in => [395682] }, ], ); my @args = ( -columns => $cols, -from => [ -join => @rels ], -where => \%where, -group_by => [ 'lt.id', 'r.id' ], -having => { $is_private => undef }, # not private patient -order_by => 'r.id', #-limit => 100, #-offset => 100, ); # p @args; # need to supply our own injection_guard to override built-in sql injection # attack prevention which detects ';' in GROUP_CONCAT, but we don't need one # here as we have no user input: my $sqla = SQL::Abstract::More->new( injection_guard => qr/^$/ ); my ($sql, @bind) = $sqla->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; return ($sql, @bind); } sub get_excluded_lab_test_ids { my @test_ids; my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' ); my %args = ( -columns => 'lt.id', -from => [ -join => @rels ], ); while ( my($section, $ref) = each %{$excluded_lab_tests} ) { $args{'-where'} = { 'ls.section_name' => $section, 'lt.field_label' => { -in => $ref }, }; my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; my @ids = $dbix->query($sql, @bind)->column; # ref push @test_ids, @ids; } # p @test_ids; return wantarray ? @test_ids : \@test_ids; } sub get_extraction_dates_for_requests { my $request_ids = shift; my @cols = ( 'ts.request_id', q/IF(lt.test_name='cd138_dna','CD138DNA',LEFT(lt.field_label,3))|type/, 'ts.time', ); my @rels = ( 'request_lab_test_status|ts', 'ts.lab_test_id=lt.id' => 'lab_tests|lt' , 'ts.status_option_id=so.id' => 'lab_test_status_options|so', ); my @extraction_tests = qw( dna_extraction rna_extraction cd138_dna ); my %where = ( 'so.description' => 'complete', 'lt.test_name' => { -in => \@extraction_tests }, 'ts.request_id' => { -in => $request_ids }, ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; my $query = $dbix->query( $sql, @bind ); my %h; while ( my $ref = $query->hash ) { my $request_id = $ref->{request_id}; my $datetime = $ref->{time}; my $type = $ref->{type}; $h{$request_id}{$type} = $datetime; } return \%h; } sub get_processing_date { my $ref = shift; # FISH tests not done on D/RNA extraction so can't get a processing date: return if $ref->{_section_name} eq 'FISH'; my $test_name = $ref->{_test_name}; # p $test_name; my $lab_num = $ref->{local_sample_id}; my $req_id = $ref->{internal_request_id}; # p $req_id; # get extraction dates for DNA and/or RNA: my $extraction = $extraction_dates->{$req_id}; # p $extraction; debug("no extraction for $lab_num $test_name") and return unless $extraction; # get expected material, RNA if test_name in %rna_tests, otherwise assume # DNA, except a small minority of tests done on CD138-selected DNA (see below): my $material = $rna_tests->{$test_name} ? 'RNA' : 'DNA'; # get extraction date for test material: my $date = $extraction->{$material}; # some tests (eg MLPA myeloma) are done on CD138-selected DNA - # key = CD138DNA in %extraction_dates: $date ||= $extraction->{CD138DNA}; # or give up - no extraction date available: debug("no extraction date for $lab_num $test_name $material") and return unless $date; return $date; } =begin # not used - replaced by datetime_order_received sub get_test_request_date { my $ref = shift; # p $ref; my $request_id = $ref->{internal_request_id}; my $registered = $ref->{datetime_sample_received}; my $test_name = $ref->{test_name}; my $screened = $ref->{_datetime_screened}; # use manual request date if exists, or auto-request date, or date of screening: my $manual_request_date = get_manual_test_request($request_id, $test_name); # auto-requested by sample-type, therefore date = registration: my $auto_request_date = $registration_lab_tests->{$test_name} ? $registered : 0; # zero OK as will be tested for truth below # p [$manual_request_date, $auto_request_date, $screened]; return $manual_request_date || $auto_request_date || $screened; } =cut =begin # not used: sub get_registration_lab_test_requests { my @cols = ( 'DISTINCT t2.field_label', 1 ); my @rels = ('specimen_lab_test|t1', 't1.lab_test_id=t2.id', 'lab_tests|t2'); my @args = ( -columns => \@cols, -from => [ -join => @rels ], ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; return $dbix->query($sql, @bind)->map; } =cut sub get_rna_tests { my $skipped_tests = shift; my @rels = ( 'linked_lab_test|llt' , 'llt.parent_test_id=lt1.id' => 'lab_tests|lt1' , 'llt.linked_test_id=lt2.id' => 'lab_tests|lt2' , ); my @args = ( -columns => [ 'lt1.test_name', 1 ], -from => [ -join => @rels ], -where => { 'lt1.field_label' => { -not_in => $skipped_tests }, 'lt2.test_name' => 'rna_extraction', }, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; my $map = $dbix->query( $sql, @bind )->map; # add rna_extraction in case extraction tests are required in data return: $map->{rna_extraction} = 1; return $map; } =begin # called from get_test_request_date(), not used sub get_manual_test_request { my ($request_id, $test_name) = @_; my @args = ( -columns => [ 'time' ], -from => [ 'request_lab_test_history' ], -where => { request_id => $request_id, action => { -rlike => '^(auto-)?requested( linked test)? ' . $test_name }, }, -order_by => [ '-time' ], # ORDER BY time DESC -limit => 1, # returns most recent only if >1 ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; # $dbix->dump_query($sql, @bind) if $query_output; # exit; return $dbix->query($sql, @bind)->value; } =cut sub get_ngis_test_code { my $ref = shift; # p $ref if $ref->{test_name} =~ /Burkitt mutation/; my $presentation = $ref->{_presentation}; my $diagnosis_id = $ref->{_diagnosis_id}; my $lab_test_id = $ref->{_lab_test_id}; my $lab_section = $ref->{_section_name}; my $test_name = $ref->{test_name}; # say $test_name; my $test_type = $ref->{_test_type}; my $diagnosis = $ref->{_diagnosis} || ''; my $lab_num = $ref->{local_sample_id}; my $presentation_diagnosis = join '::', $presentation, $diagnosis || '[NOT REPORTED]'; # p $presentation_diagnosis; # get NGIS code(s) for this lab-test: my $ngis_test_code = $ngis_lab_tests->{$lab_test_id}; # p $ngis_test_code; # aref unless ($ngis_test_code) { my $title =join ' :: ', $test_name, $test_type, $lab_section; $STASH{no_ngis_code}{$title}++; # debug("$lab_num: no NGIS id for $test_name"); return $test_name !~ /[DR]NA extraction|CD138\+ DNA/ # return undef if extraction ? $presentation_diagnosis : undef; } # return NGIS test code if it's the only one (even if not supported clinical # indication; TODO: this might not be suitable, might need to consider diagnosis): return $ngis_test_code->[0] if scalar @$ngis_test_code == 1; my $all_test_codes = join ', ', @$ngis_test_code; # for log # now have multiple NGIS id's for lab-test # common message for debugging: my $msg = qq!$lab_num: multiple NGIS codes for '$test_name'!; # vars for try_presentation(): my %vars_for_presentation = ( data => $ref, ngis_code => $ngis_test_code ); # now need to examine diagnosis: if (! $diagnosis_id) { if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) { # debug("found $ngis_code from $presentation"); return $ngis_code; } # io($log)->appendln("$msg and request is not reported"); push @{ $STASH{unreported} }, [ $lab_num, $test_name, $presentation ]; return $presentation_diagnosis; } # get clinical indication for diagnosis (eg AML NOS = M80) my $indication = $ref->{_clinical_indication_code}; # p $indication; if (! $indication) { # debug('no clinical indication'); if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) { # debug("found $ngis_code from $presentation"); return $ngis_code; } #io($log)->appendln("$msg and '$diagnosis' has no clinical indication id" # . " [presentation: $presentation]"); push @{ $STASH{no_clinical_indication} }, [ $lab_num, $test_name, $presentation, $diagnosis ]; return $presentation_diagnosis; } # p $indication; p $ngis_ids; # return NGIS code from list if has a matching clinical indication: for my $ngis_code (@$ngis_test_code) { # warn $ngis_code; # debug("$lab_num: $indication -> $ngis_code") if $ngis_code =~ /^$indication/; return $ngis_code if $ngis_code =~ /^$indication/; } # last resort is to try to find NGIS code from presentation: if ( my $ngis_code = try_presentation(\%vars_for_presentation) ) { # debug("found $ngis_code from $presentation"); return $ngis_code; } # can't find an NGIS code for lab-test from presentation or diagnosis: #io($log)->appendln("$msg and diagnosis indication $indication [$diagnosis] " # . "did not match any available NGIS test codes [presentation: $presentation]"); # debug("no NGIS id available for $test_name"); push @{ $STASH{no_available_ngis_code} }, [ $lab_num, $test_name, $all_test_codes, $diagnosis, $indication, $presentation ]; # return undef; return $presentation_diagnosis; } sub try_presentation { # try to look for a loose match with presentation term: my $args = shift; my $presentation = $args->{data}->{_presentation}; my $request_id = $args->{data}->{internal_request_id}; my $ngis_codes = $args->{ngis_code}; my $test_name = $args->{data}->{test_name}; # p $presentation; p $ngis_codes; p $test_name; # get any previous screening terms from history file: my @all_screens = get_request_history($request_id); push @all_screens, $presentation; # p @all_screens; for my $screen (@all_screens) { for my $type ( keys %presentation_types ) { # p $type; # eg AML, MDS if ( $screen =~ /$type/ ) { # eg Suspected MDS =~ /MDS/ # debug("$screen matches $type"); for my $code (@$ngis_codes) { # eg M84.2 # p $code; p $presentation_types{$type}; if ( $code =~ /^$presentation_types{$type}/ ) { # eg M84.2 =~ /^M84/ # debug("$code matches $presentation_types{$type}"); return $code; } } } } } { # if we get this far, no other match possible, force NGIS code if test # configured in %forced_ngis_codes: for my $test ( keys %forced_ngis_codes ) { return $forced_ngis_codes{$test} if $test_name eq $test; } # BCR-ABL p210 RQ or BCR-ABL multiplex: return $forced_ngis_codes{'BCR-ABL-tests'} if $test_name =~ /BCR-ABL/; } return undef; } sub get_request_history { my $request_id = shift; # p $request_id; my $query = $dbix->select('request_history', 'action', { request_id => $request_id, action => { -like => 'updated initial screen entry %' } } ); my @screens; while ( my $action = $query->value ) { # p $action; if ( $action =~ /updated initial screen entry (.*)/ ) { push @screens, $1; } } return wantarray ? @screens : \@screens; } sub get_ngis_lab_tests { my $q = $dbix->select('ngis_lab_test', [qw/lab_test_id ngis_test_code/]); my %h; while ( my $ref = $q->array ) { # p $ref; my $lab_test_id = $ref->[0]; push @{ $h{$lab_test_id} }, $ref->[1]; } return \%h; } sub split_fish_panels { # only some FISH panels (those without an NGIS map): my $data = shift; # p $data; my $required_fish_panels = get_fish_panel_names_without_ngis_codes(); # p $required_fish_panels; my @fish_panel_lab_tests = @{ $panel_lab_test->{FISH} }; my %non_ngis_fish_panels; # skip panels from $fish_panel_lab_test with NGIS codes: for my $ref (@fish_panel_lab_tests) { # AoA my $panel_name = $ref->[0]; # p $panel_name; next unless grep { $panel_name eq $_ } @$required_fish_panels; # debug("$panel_name in required_fish_panels"); $non_ngis_fish_panels{$panel_name} = $ref->[1]; } # p %non_ngis_fish_panels; # coderef for substituting test names (eg "DLBCL + FL" panel -> "DAKO MYC") my $substitute_values = sub { my ($row, $new_test_name) = @_; $row->{test_name} = $new_test_name; # also need to change lab_test_id for finding NGIS test code: $row->{_lab_test_id} = $section_lab_test_id->{FISH}->{$new_test_name}; $row->{_test_type} = 'test'; # for log in case it's unmapped }; ROW: for my $row (@$data) { my $test_name = $row->{test_name}; my $lab_num = $row->{local_sample_id}; # if test name matches one of FISH panels without an NGIS code: if ( grep $test_name eq $_, keys %non_ngis_fish_panels ) { # get FISH panel test names: my @panel_tests = @{ $non_ngis_fish_panels{$test_name} }; { # change test_name to 1st element of $panel_tests my $new_test_name = shift @panel_tests; debug("$lab_num: changing $test_name to $new_test_name"); &$substitute_values($row,$new_test_name); } # now clone $row for each remaining element in @panel_tests: TEST: for my $t (@panel_tests) { my $new = clone $row; # repeat above substitutions: debug("$lab_num: adding new row with test-name $t"); &$substitute_values($new,$t); push @$data, $new; } } } # re-sort by lab_number (@data now out of sequence after pushing new rows) my @new = sort { $a->{internal_request_id} <=> $b->{internal_request_id} } @data; return wantarray ? @new : \@new; } sub get_fish_panel_names_without_ngis_codes { my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id' => 'lab_sections|ls', '=>nlt.lab_test_id=lt.id' => 'ngis_lab_test|nlt' ); my %where = ( 'ls.section_name' => 'FISH', 'lt.test_type' => 'panel', 'lt.field_label' => { -not_in => $excluded_lab_tests->{FISH} }, 'nlt.lab_test_id' => undef, ); my @args = ( -columns => 'lt.field_label', -from => [ -join => @rels ], -where => \%where, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; return $dbix->query( $sql, @bind )->column; } sub reconstitute_molecular_panels { my $data = shift; # p $data; # new data file: my @new; # keep track of request_id: my $last_request_id = 0; # holder for molecular tests data: my @molecular_tests = (); # will be destroyed and recreated after each request ROW: for my $row (@$data) { my $lab_section = $row->{_section_name}; my $request_id = $row->{internal_request_id}; # if new request, handle molecular tests data, if any: if ( @molecular_tests && $request_id != $last_request_id ) { $log->info("molecular tests for $last_request_id:", { set => [ map $_->{test_name}, @molecular_tests ] } ); my @rows = molecular_tests_to_panels(\@molecular_tests); # p @rows; push @new, @rows; @molecular_tests = (); # reset for next request } push @new, $row and next ROW if $lab_section ne 'Molecular'; # have molecular data row: push @molecular_tests, $row; # set last_request_id to current request_id: $last_request_id = $request_id; } # p %tests; return wantarray ? @new : \@new; } sub molecular_tests_to_panels { my $ref = shift; # AoH's my @molecular_panel_lab_tests = @{ $panel_lab_test->{Molecular} }; my @all_molecular_tests = map $_->{test_name}, @$ref; # say 'test_names:'; p @all_molecular_tests; # panels ordered in size of array, largest 1st to prevent matches with # smaller panels having common members (eg AML 60+ & AML under-60): PANEL: for my $t (@molecular_panel_lab_tests) { # eg AML 60+ => [NPM1, FLT3 ITD] my ($panel_name, $panel_tests) = @$t; # p @$panel_tests; # don't need ordered list, use -u flag for speed boost: my $lc = List::Compare->new('-u', $panel_tests, \@all_molecular_tests); # if this panel list is a subset of all_molecular_tests: if ( $lc->is_LsubsetR ) { # is left-a-subset-of-right ? $log->info("$panel_name panel found in molecular set:", { panel_tests => $panel_tests } ); my @set = @$panel_tests; # localise for element substitutions my %seen = (); # reset test name stash for this panel # remove 1st element in @set into a variable: my $first_element_of_set = shift @set; # debug("first_element_of_set: $first_element_of_set"); # for test name of 1st entry in $ref matching 1st element of @set, # change it to the matched panel_name & flag the rest for deletion: DATA: for my $d (@$ref) { # p $d; # data href next DATA if $d->{DELETE_ME}; # already flagged for deletion my $test_name = $d->{test_name}; # p $test_name; my $lab_num = $d->{local_sample_id}; # debug("$lab_num: testing whether $test_name eq $first_element_of_set"); if ( $test_name eq $first_element_of_set ) { $log->info("changing $test_name to $panel_name"); debug("$lab_num: changing $test_name to $panel_name"); $d->{test_name} = $panel_name; $d->{_test_type} = 'panel'; # for log in case it's unmapped $d->{_lab_test_id} # for finding NGIS test code: = $section_lab_test_id->{Molecular}->{$panel_name}; } elsif ( grep $test_name eq $_, @set ) { # set a flag for later (only once per test in case it's # re-requested or is part of another panel): next DATA if $seen{$test_name}++; $log->info("$test_name is in $panel_name set, flagging for deletion"); # debug("$test_name is in $panel_name set, flagging for deletion") $d->{DELETE_ME}++; } } } } # p $ref; $log->info('=' x 30); # return original hashrefs not flagged for deletion: my @data = map $_, grep {! $_->{DELETE_ME} } @$ref; # say 'new rows'; p @data; return wantarray ? @data : \@data; } sub get_section_lab_tests { my $lab_sections = shift; # aref my @rels = ( 'lab_tests|lt', 'lt.lab_section_id=ls.id', 'lab_sections|ls' ); my %args = ( -columns => [ qw/lt.field_label lt.id/ ], -from => [ -join => @rels ], -where => { 'ls.section_name' => undef }, # defined in block ); # p @args; my %h; for my $section (@$lab_sections) { $args{'-where'}{'ls.section_name'} = $section; my ($sql, @bind) = SQL::Abstract::More->new->select(%args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; $h{$section} = $dbix->query( $sql, @bind )->map; } # p \%h; return \%h; } sub get_panel_lab_tests { my $lab_section = shift; # aref my @cols = ( 'lt1.field_label|panel', 'lt2.field_label|test', ); my @rels = ( 'panel_lab_test|plt' , 'plt.panel_test_id=lt1.id' => 'lab_tests|lt1' , 'plt.lab_test_id=lt2.id' => 'lab_tests|lt2' , 'lt1.lab_section_id=ls.id' => 'lab_sections|ls' , ); my %panel_tests; for my $section (@$lab_section) { # need to *locally* exclude DNA extraction, if not already excluded: my @local_excluded_tests = @{ $excluded_lab_tests->{$section} }; push @local_excluded_tests, 'DNA extraction' unless grep { $_ eq 'DNA extraction' } @local_excluded_tests; my %where = ( 'lt1.field_label' => { -not_in => \@local_excluded_tests }, 'lt2.field_label' => { -not_in => \@local_excluded_tests }, 'ls.section_name' => $section, 'lt1.is_active' => 'yes', ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; my $query = $dbix->query( $sql, @bind ); my %h; while ( my $ref = $query->hash ) { my $panel = $ref->{panel}; push @{ $h{$panel } }, $ref->{test}; } # p %h; # re-arrange in order of size of panel, largest 1st: my @data = map [ +($_ => $h{$_}) ], sort { scalar @{ $h{$b} } <=> scalar @{ $h{$a} } } keys %h; # p @data; $panel_tests{$section} = \@data; } return \%panel_tests; } sub test_name_order { # p $a; p $b; my ($req_num_a, $yr_a) = split '/', $a->[0]; my ($req_num_b, $yr_b) = split '/', $b->[0]; my $test_name_a = $a->[1]; my $test_name_b = $b->[1]; return $test_name_a cmp $test_name_b || $yr_a <=> $yr_b || $req_num_a <=> $req_num_b; } sub _do_log { # p %STASH; my $no_ngis_code = $STASH{no_ngis_code}; my $unreported = $STASH{unreported}; my $no_clinical_indication = $STASH{no_clinical_indication}; my $no_available_ngis_code = $STASH{no_available_ngis_code}; my $filename = sprintf '%s/geneq_data_extract_log_%s.xlsx', $Bin . '/../../../logs/geneq', lc $ref_date->strftime('%b_%Y'); my $xl = Local::WriteExcel->new( filename => $filename ); # p $xl; if ($no_clinical_indication) { $xl->worksheet_name('no clinical indication'); my @cols = qw( lab_num test_name presentation diagnosis ); $xl->write_bold_row(\@cols); $xl->write_row($_) for @$no_clinical_indication; } if ($no_available_ngis_code) { $xl->worksheet_name('no available ngis test-code'); my @cols = qw( lab_num test_name ngis_codes diagnosis clinical_indication presentation ); $xl->write_bold_row(\@cols); $xl->write_row($_) for @$no_available_ngis_code; } if ($unreported) { $xl->worksheet_name('unreported'); $xl->write_bold_row([ qw/lab_num test_name presentation/] ); $xl->write_row($_) for @$unreported; } if ($no_ngis_code) { # p $no_ngis_code; $xl->worksheet_name('no ngis test-code'); $xl->write_bold_row([ qw/test_name test_type lab_section count/ ] ); $xl->write_row([ (split ' :: ', $_ ), $no_ngis_code->{$_} ]) for sort keys %{ $no_ngis_code }; } if ($JUST_TESTING) { $xl->save(); } else { my $subject = 'GeNEQ data extraction log ' . $ref_date->strftime('%b %Y'); # p $subject; # capture filename from its full path: $filename =~ /(geneq_data_extract_log_.*)/; # p $1; my %mail = ( attachment => $xl->data, filename => $1, # captured from above subject => $subject, config => $config, ); $tools->send_mail(\%mail, \@log_recipients); } { # text file: # log summary data to $logfile, override any previous adapter settings Log::Any::Adapter->set(File => $logfile); $log->info('DIAGNOSIS & PRESENTATION WITH NO CLINICAL INDICATION'); $log->info(join ' :: ', @$_) for sort test_name_order @$no_clinical_indication; $log->info('DIAGNOSIS WITH NO AVAILABLE NGIS CODE & PRESENTATION ' . 'WITH NO CLINICAL INDICATION'); $log->info(join ' :: ', @$_) for sort test_name_order @$no_available_ngis_code; $log->info('UNREPORTED REQUESTS'); $log->info(join ' :: ', @$_) for @$unreported; $log->info('LAB TESTS WITH NO NGIS ID'); $log->info(join ':: ', $_, $no_ngis_code->{$_}) for sort keys %{ $no_ngis_code }; } }