P3X GitList

#!/usr/bin/env perl

=begin -------------------------------------------------------------------------
DNA/RNA extractions for NGIS data feed
=cut
#-------------------------------------------------------------------------------

#==============================================================================
my $duration = 1; # month; default unless specified in command-line opts
#==============================================================================

my $testing      =    # --testing|t - nothing yet
my $query_output =    # --query|q - output sql queries to console
my $cumulative   = 0; # --cumulative - all requests since 1st of month of ref_date

use Getopt::Long;
GetOptions (
    "months|m=i" => \$duration,       # int
    "testing|t"  => \$testing,        # flag
    "query|q"    => \$query_output,   # flag
    "cumulative" => \$cumulative,     # flag
); # warn $duration; warn $cumulative; exit;

use strict;
use warnings;
use feature 'say';

my $JUST_TESTING = $testing || 0;

use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;

use Data::Printer;
use SQL::Abstract::More;
use Spreadsheet::WriteExcel::Simple;
use Time::HiRes qw(gettimeofday tv_interval);

use lib $Bin . '/../../lib';
use LIMS::Local::ScriptHelpers;

use lib '/home/raj/perl-lib';
use Local::DB;

my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

my $today = $tools->time_now;
my $dbix  = Local::DB->dbix({ dbname => 'hilis4' });
$dbix->lc_columns = 0; # preserve mixed case on col names

$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # or will defeat DATE(col)

my $filename = $Bin.'/geneq_xna_extractions.xls';
my $ref_date = $today->clone->subtract( months => $duration ); # warn $ref_date;

# only want DNA & RNA extractions:
my @lab_test_names = qw(dna_extraction rna_extraction cd138_dna);

# this needs to go in config file (shared with data_extract script):
my @excluded_parent_codes = (
	'RWM', 	  # Cardiff
	'RVA', 	  # Carmarthen
	'RT8', 	  # Conwy
	'RVF', 	  # Gwent
	'RT9', 	  # NE Wales
	'RT7', 	  # NW Wales
	'RVC', 	  # Swansea
	'RQF', 	  # Velidre, Wales
	'8EQ15',  # Drs Lab
	'MSDMDH', # Mater Dei
	'X99999', 'V81999' # unknowns
);

my ($sql, @bind) = _get_main_query(); # p $sql; p @bind;
my $t0 = [gettimeofday];

my $query = $dbix->query( $sql, @bind );

# get cols from query, except 'private' ones used for evaluations (eg _specimen):
my @headers = grep $_ !~ /^_/, $query->columns; # p @headers;
my @data = $query->hashes; runtimer('query runtime');

my $xl = Spreadsheet::WriteExcel::Simple->new;
$xl->write_bold_row(\@headers);

runtimer('commencing data processing');

# process @data:
for my $ref (@data) {
    my $data = process_data($ref); # returns arrayref
	$xl->write_row($data);
}
runtimer('completed data processing');

$xl->save($filename);

sub process_data {
    my $data = shift;

    # convert _specimen to sample_type_received:
    my $specimen = $data->{_specimen}; # p $sample_type;
    $data->{sample_type_received} = convert_sample_type($specimen);

    return [ @{$data}{@headers} ];
}

sub convert_sample_type {
    my $specimen = shift;

    if ( $specimen =~ /blood/ ) { # p $specimen;
        return 'Blood';
    }
    elsif ( $specimen =~ /bone marrow/ ) { # p $specimen;
        return 'Bone marrow';
    }
    elsif ( $specimen =~ /block/ ) { # p $specimen;
        return 'Paraffin section';
    }
    elsif ( $specimen =~ /fixed/ ) { # p $specimen; # includes unfixed
        return 'Solid tumour (cancer)';
    }
    elsif ( $specimen =~ /slide/ ) { # p $specimen;
        return 'Archived sample';
    } # p $specimen;

    # default:
    return 'Others (tissues etc)';
}

sub _get_main_query {
    my $is_private = q!MAX( CASE WHEN ao.option_name = 'private' THEN 1 END )!;
    my $begin_date = $ref_date->clone->set_day(1); # first day of $ref_date month

    my %date_restriction = $cumulative # if --cumulative Getopt:
        ? ( '>=', $begin_date->ymd ) # all since ref_date
        : ( -between => [ # first & last day of ref_date month:
                $begin_date->ymd, # 1st day
                $begin_date->add(months => 1)->subtract(days => 1)->ymd, # last day
            ] );

    my @cols = (
       'CONCAT_WS("/",  r.request_number, r.year - 2000)|local_sample_id',
        q!"GeNEQ Leeds (HILIS)"|booking_laboratory!,
        q!"GeNEQ Leeds (HILIS)"|processing_laboratory!,
        'NULL|sample_type_received', # defined in convert_sample_type()
        'r.created_at|datetime_sample_received',
        'ts.time|datetime_processing_complete',
        q!IF( lt.field_label RLIKE 'DNA', 'DNA', 'RNA' )|extraction_type!,
        q!"Success"|extraction_status!, # we don't record outcomes
        # private - for data processing only:
        's.description|_specimen',
    );
    my @rels = (                                'requests|r'                   ,
        q{r.patient_case_id=pc.id}           => 'patient_case|pc'              ,
        q{pc.referral_source_id=rs.id}       => 'referral_sources|rs'          ,
        q{rs.parent_organisation_id=po.id}   => 'parent_organisations|po'      ,
        q{rsp.request_id=r.id}               => 'request_specimen|rsp'         ,
        q{rsp.specimen_id=s.id}              => 'specimens|s'                  ,
        q{ts.request_id=r.id}                => 'request_lab_test_status|ts'   ,
        q{ts.status_option_id=so.id}         => 'lab_test_status_options|so'   ,
        q{ts.lab_test_id=lt.id}              => 'lab_tests|lt'                 ,
        # left joins:
        q{=>rt.request_id=r.id}              => 'request_trial|rt'             ,
        q{=>ro.request_id=r.id}              => 'request_option|ro'            ,
        q{=>ro.option_id=ao.id}              => 'additional_options|ao'        ,
    );
    my %where = (
        -and => [ # repeated elements (eg parent_code), so requires arrayref
            'rt.request_id'        => undef,      # not in clinical trial
            'so.description'       => 'complete', # lab-test status
            'rs.organisation_code' => { -not_rlike => '^NT' },    # private hospital
            'po.parent_code'       => { -not_rlike => '^(S|Z)' }, # scotland/ireland
            'po.parent_code'       => { -not_in => \@excluded_parent_codes },
            'DATE(r.created_at)'   => \%date_restriction,
            'lt.test_name'         => { -in => \@lab_test_names },
            # to restrict to specific request id's:
            # 'r.id'               => { -in => [369247,369248] },
        ],
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
        -group_by => [ 'r.id', 'lt.id' ],
        -having   => { $is_private => undef }, # not private patient
        -order_by => 'r.id',
        #-limit => 100,
        #-offset => 100,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
         $dbix->dump_query($sql, @bind) if $query_output; # exit;
    return ($sql, @bind);
}

sub runtimer { say sprintf "$_[0]: %.2f sec", tv_interval $t0, [gettimeofday] }