#!/usr/bin/env perl
=begin -------------------------------------------------------------------------
DNA/RNA extractions for NGIS data feed
=cut
#-------------------------------------------------------------------------------
#==============================================================================
my $duration = 1; # month; default unless specified in command-line opts
#==============================================================================
my $testing = # --testing|t - nothing yet
my $query_output = # --query|q - output sql queries to console
my $cumulative = 0; # --cumulative - all requests since 1st of month of ref_date
use Getopt::Long;
GetOptions (
"months|m=i" => \$duration, # int
"testing|t" => \$testing, # flag
"query|q" => \$query_output, # flag
"cumulative" => \$cumulative, # flag
); # warn $duration; warn $cumulative; exit;
use strict;
use warnings;
use feature 'say';
my $JUST_TESTING = $testing || 0;
use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;
use Data::Printer;
use SQL::Abstract::More;
use Spreadsheet::WriteExcel::Simple;
use Time::HiRes qw(gettimeofday tv_interval);
use lib $Bin . '/../../lib';
use LIMS::Local::ScriptHelpers;
use lib '/home/raj/perl-lib';
use Local::DB;
my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);
my $today = $tools->time_now;
my $dbix = Local::DB->dbix({ dbname => 'hilis4' });
$dbix->lc_columns = 0; # preserve mixed case on col names
$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col)
my $filename = $Bin.'/geneq_xna_extractions.xls';
my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date;
# only want DNA & RNA extractions:
my @lab_test_names = qw(dna_extraction rna_extraction cd138_dna);
# this needs to go in config file (shared with data_extract script):
my @excluded_parent_codes = (
'RWM', # Cardiff
'RVA', # Carmarthen
'RT8', # Conwy
'RVF', # Gwent
'RT9', # NE Wales
'RT7', # NW Wales
'RVC', # Swansea
'RQF', # Velidre, Wales
'8EQ15', # Drs Lab
'MSDMDH', # Mater Dei
'X99999', 'V81999' # unknowns
);
my ($sql, @bind) = _get_main_query(); # p $sql; p @bind;
my $t0 = [gettimeofday];
my $query = $dbix->query( $sql, @bind );
# get cols from query, except 'private' ones used for evaluations (eg _specimen):
my @headers = grep $_ !~ /^_/, $query->columns; # p @headers;
my @data = $query->hashes; runtimer('query runtime');
my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row(\@headers);
runtimer('commencing data processing');
# process @data:
for my $ref (@data) {
my $data = process_data($ref); # returns arrayref
$xl->write_row($data);
}
runtimer('completed data processing');
$xl->save($filename);
sub process_data {
my $data = shift;
# convert _specimen to sample_type_received:
my $specimen = $data->{_specimen}; # p $sample_type;
$data->{sample_type_received} = convert_sample_type($specimen);
return [ @{$data}{@headers} ];
}
sub convert_sample_type {
my $specimen = shift;
if ( $specimen =~ /blood/ ) { # p $specimen;
return 'Blood';
}
elsif ( $specimen =~ /bone marrow/ ) { # p $specimen;
return 'Bone marrow';
}
elsif ( $specimen =~ /block/ ) { # p $specimen;
return 'Paraffin section';
}
elsif ( $specimen =~ /fixed/ ) { # p $specimen; # includes unfixed
return 'Solid tumour (cancer)';
}
elsif ( $specimen =~ /slide/ ) { # p $specimen;
return 'Archived sample';
} # p $specimen;
# default:
return 'Others (tissues etc)';
}
sub _get_main_query {
my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!;
my $begin_date = $ref_date->clone->set_day(1); # first day of $ref_date month
my %date_restriction = $cumulative # if --cumulative Getopt:
? ( '>=', $begin_date->ymd ) # all since ref_date
: ( -between => [ # first & last day of ref_date month:
$begin_date->ymd, # 1st day
$begin_date->add(months => 1)->subtract(days => 1)->ymd, # last day
] );
my @cols = (
'CONCAT_WS("/", r.request_number, r.year - 2000)|local_sample_id',
q!"GeNEQ Leeds (HILIS)"|booking_laboratory!,
q!"GeNEQ Leeds (HILIS)"|processing_laboratory!,
'NULL|sample_type_received', # defined in convert_sample_type()
'r.created_at|datetime_sample_received',
'ts.time|datetime_processing_complete',
q!IF( lt.field_label RLIKE 'DNA', 'DNA', 'RNA' )|extraction_type!,
q!"Success"|extraction_status!, # we don't record outcomes
# private - for data processing only:
's.description|_specimen',
);
my @rels = ( 'requests|r' ,
q{r.patient_case_id=pc.id} => 'patient_case|pc' ,
q{pc.referral_source_id=rs.id} => 'referral_sources|rs' ,
q{rs.parent_organisation_id=po.id} => 'parent_organisations|po' ,
q{rsp.request_id=r.id} => 'request_specimen|rsp' ,
q{rsp.specimen_id=s.id} => 'specimens|s' ,
q{ts.request_id=r.id} => 'request_lab_test_status|ts' ,
q{ts.status_option_id=so.id} => 'lab_test_status_options|so' ,
q{ts.lab_test_id=lt.id} => 'lab_tests|lt' ,
# left joins:
q{=>rt.request_id=r.id} => 'request_trial|rt' ,
q{=>ro.request_id=r.id} => 'request_option|ro' ,
q{=>ro.option_id=ao.id} => 'additional_options|ao' ,
);
my %where = (
-and => [ # repeated elements (eg parent_code), so requires arrayref
'rt.request_id' => undef, # not in clinical trial
'so.description' => 'complete', # lab-test status
'rs.organisation_code' => { -not_rlike => '^NT' }, # private hospital
'po.parent_code' => { -not_rlike => '^(S|Z)' }, # scotland/ireland
'po.parent_code' => { -not_in => \@excluded_parent_codes },
'DATE(r.created_at)' => \%date_restriction,
'lt.test_name' => { -in => \@lab_test_names },
# to restrict to specific request id's:
# 'r.id' => { -in => [369247,369248] },
],
);
my @args = (
-columns => \@cols,
-from => [ -join => @rels ],
-where => \%where,
-group_by => [ 'r.id', 'lt.id' ],
-having => { $is_private => undef }, # not private patient
-order_by => 'r.id',
#-limit => 100,
#-offset => 100,
); # p @args;
my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
$dbix->dump_query($sql, @bind) if $query_output; # exit;
return ($sql, @bind);
}
sub runtimer { say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday] }