#!/usr/bin/env perl =begin ------------------------------------------------------------------------- DNA/RNA extractions for NGIS data feed runs at 6am on 1st & 3rd Friday of month via cron: 0 6 1-7 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ... 0 6 15-21 * * [ "$(date '+\%a')" = "Fri" ] && $MULTI/$0 ... manual run: $0 [-m , -t , -q , --cumulative ] =cut #------------------------------------------------------------------------------- #=============================================================================== my @recipients = qw( paul.mcintosh john.fraser raj ); #=============================================================================== my $query_output = 0; # --query|q - output sql queries to console my $cumulative = 0; # --cumulative - all requests since 1st of month of ref_date my $duration = 1; # --month|m - default unless specified in command-line opts my $testing = 0; # --testing|t - saves file locally, doesn't email use Getopt::Long; GetOptions ( "months|m=i" => \$duration, # int "testing|t" => \$testing, # flag "query|q" => \$query_output, # flag "cumulative" => \$cumulative, # flag ); # warn $duration; warn $cumulative; exit; use strict; use warnings; use feature 'say'; my $JUST_TESTING = $testing || 0; use lib '/home/raj/perl5/lib/perl5'; use FindBin qw($Bin); # warn $Bin; use Data::Printer; use SQL::Abstract::More; use Spreadsheet::WriteExcel::Simple; use Time::HiRes qw(gettimeofday tv_interval); use lib $Bin . '/../../../lib'; use LIMS::Local::ScriptHelpers; use lib '/home/raj/perl-lib'; use Local::DB; my $tools = LIMS::Local::ScriptHelpers->new(); $tools->test_only($JUST_TESTING); my $config = $tools->config(); my $today = $tools->time_now; my $dbix = Local::DB->dbix({ dbname => 'hilis4' }); $dbix->lc_columns = 0; # preserve mixed case on col names $Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col) my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date; my $filename = sprintf 'YNEGLH_HMDS_xna_%s.xls', $ref_date->strftime('%b_%Y'); # warn $filename; my $subject = sprintf 'YNEGLH HMDS extractions %s', $ref_date->strftime('%b %Y'); # only want DNA & RNA extractions: my @lab_test_names = qw(dna_extraction rna_extraction cd138_dna cd19_dna); # this needs to go in config file (shared with data_extract script): my @excluded_parent_codes = ( 'RWM', # Cardiff 'RVA', # Carmarthen 'RT8', # Conwy 'RVF', # Gwent 'RT9', # NE Wales 'RT7', # NW Wales 'RVC', # Swansea 'RQF', # Velidre, Wales '8EQ15', # Drs Lab 'MSDMDH', # Mater Dei 'X99999', 'V81999' # unknowns ); my ($sql, @bind) = _get_main_query(); # p $sql; p @bind; my $t0 = [gettimeofday]; my $query = $dbix->query( $sql, @bind ); # get cols from query, except 'private' ones used for evaluations (eg _specimen): my @headers = grep $_ !~ /^_/, $query->columns; # p @headers; my @data = $query->hashes; runtimer('query runtime'); my $xl = Spreadsheet::WriteExcel::Simple->new; $xl->write_bold_row(\@headers); runtimer('commencing data processing'); # process @data: for my $ref (@data) { my $data = process_data($ref); # returns arrayref $xl->write_row($data); } runtimer('completed data processing'); if ($JUST_TESTING) { $xl->save(join '/', $Bin, $filename); } else { my %mail = ( attachment => $xl->data, filename => $filename, subject => $subject, config => $config, ); $tools->send_mail(\%mail, \@recipients); } sub process_data { my $data = shift; # convert _specimen to sample_type_received: my $specimen = $data->{_specimen}; # p $sample_type; $data->{sample_type_received} = convert_sample_type($specimen); return [ @{$data}{@headers} ]; } sub convert_sample_type { my $specimen = shift; if ( $specimen =~ /blood/ ) { # p $specimen; return 'Blood'; } elsif ( $specimen =~ /bone marrow/ ) { # p $specimen; return 'Bone marrow'; } elsif ( $specimen =~ /block/ ) { # p $specimen; return 'Paraffin section'; } elsif ( $specimen =~ /fixed/ ) { # p $specimen; # includes unfixed return 'Solid tumour (cancer)'; } elsif ( $specimen =~ /slide/ ) { # p $specimen; return 'Archived sample'; } # p $specimen; # default: return 'Others (tissues etc)'; } sub _get_main_query { my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!; my $begin_date = $ref_date->clone->set_day(1); # first day of $ref_date month my %date_restriction = $cumulative # if --cumulative Getopt: ? ( '>=', $begin_date->ymd ) # all since ref_date : ( -between => [ # first & last day of ref_date month: $begin_date->ymd, # 1st day $begin_date->add(months => 1)->subtract(days => 1)->ymd, # last day ] ); my @cols = ( 'CONCAT_WS("/", r.request_number, r.year - 2000)|local_sample_id', q!"YNEGLH Leeds HMDS"|booking_laboratory!, q!"YNEGLH Leeds HMDS"|processing_laboratory!, 'NULL|sample_type_received', # defined in convert_sample_type() 'r.created_at|datetime_sample_received', 'ts.time|datetime_processing_complete', q!IF( lt.field_label RLIKE 'DNA', 'DNA', 'RNA' )|extraction_type!, q!"Success"|extraction_status!, # we don't record outcomes # private - for data processing only: 's.description|_specimen', ); my @rels = ( 'requests|r' , q{r.patient_case_id=pc.id} => 'patient_case|pc' , q{pc.referral_source_id=rs.id} => 'referral_sources|rs' , q{rs.parent_organisation_id=po.id} => 'parent_organisations|po' , q{rsp.request_id=r.id} => 'request_specimen|rsp' , q{rsp.specimen_id=s.id} => 'specimens|s' , q{ts.request_id=r.id} => 'request_lab_test_status|ts' , q{ts.status_option_id=so.id} => 'lab_test_status_options|so' , q{ts.lab_test_id=lt.id} => 'lab_tests|lt' , # left joins: q{=>rt.request_id=r.id} => 'request_trial|rt' , q{=>ro.request_id=r.id} => 'request_option|ro' , q{=>ro.option_id=ao.id} => 'additional_options|ao' , ); my %where = ( -and => [ # repeated elements (eg parent_code), so requires arrayref 'rt.request_id' => undef, # not in clinical trial 'so.description' => 'complete', # lab-test status 'rs.organisation_code' => { -not_rlike => '^NT' }, # private hospital 'po.parent_code' => { -not_rlike => '^(S|Z)' }, # scotland/ireland 'po.parent_code' => { -not_in => \@excluded_parent_codes }, 'DATE(r.created_at)' => \%date_restriction, 'lt.test_name' => { -in => \@lab_test_names }, # to restrict to specific request id's: # 'r.id' => { -in => [369247,369248] }, ], ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, -group_by => [ 'r.id', 'lt.id' ], -having => { $is_private => undef }, # not private patient -order_by => 'r.id', #-limit => 100, #-offset => 100, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $query_output; # exit; return ($sql, @bind); } sub runtimer { return unless $JUST_TESTING; say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday]; }