RSS Git Download  Clone
Raw Blame History
#!/usr/bin/perl

=begin
--------------------------------------------------------------------------------
generates XML data file for genomics data, ftp transfer
also transfers consent forms (or any file in request dir) with ctime within MAX_AGE
run from cron hourly, or change MAX_AGE
run as root/www-data or directory read permissions denied (on test server only ?)
--------------------------------------------------------------------------------
=cut

##############################################################
use constant MAX_AGE => 3600; # NEEDS TO BE SAME VALUE AS CRON
my $duration = $ARGV[0] || MAX_AGE; # say $duration; # seconds
my $JUST_TESTING = 1; # skips file transfer
##############################################################

use lib (
    '/home/raj/perl5/lib/perl5',
    '/home/raj/apps/HILIS4/lib',
    '/home/raj/perl-lib', # Local::XMLSimple - patched to escape single-apostrophe
);
use LIMS::Local::ScriptHelpers;
use Local::XMLSimple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here
use Data::Printer alias => 'p', use_prototypes => 0;
use FindBin qw($Bin); # warn $Bin; exit;
use Modern::Perl;
use Path::Tiny;
use IO::All;

# get tools from LIMS::Local::ScriptHelpers:
my $tools = LIMS::Local::ScriptHelpers->new();

my $sql_lib = $tools->sql_lib();
my $dbix    = $tools->dbix();
# switch to 'genomics' db:
$dbix->dbh->do('use `genomics`');

my %SQL = (
    demographics => $sql_lib->retr('genomics_demographics'),
    request_ids  => $sql_lib->retr('genomics_requests'),
    storage      => $sql_lib->retr('genomics_storage'),
    results      => $sql_lib->retr('genomics_results'),
);

# get request ids:
my @request_ids = do {
    my $sql = $SQL{request_ids};
    # count number of placeholders in $sql:
    my $n = () = $sql =~ /\?/g; # p $n;
    # bind one $duration per placeholder:
    my @bind = map $duration, (1..$n); # p \@bind;
    $dbix->query( $sql, @bind )->flat; # array 
}; # p \@request_ids; exit;
#===============================================================================
exit unless @request_ids;
#===============================================================================

my $cfg_file = "$Bin/../lib/settings.cfg";
my $settings = Config::Auto::parse($cfg_file); # p $settings;

my $timestamp = LIMS::Local::Utils::time_now();

use constant TMP_DIR => '/tmp'; # where to create temp data file

my @lab_sections = do {
    my @args = ( 'lab_sections', ['section_name'], { is_active => 'yes'} );
    $dbix->select(@args)->flat; # array
}; # p \@lab_sections;

# only need this if using xml element names for data feed:
# my $test_element_map = get_lab_test_to_xml_element_map(); # p $test_element_map; exit;

{ # go:
    # get requests 1-to-1 data:
    my @requests = $dbix->query( $SQL{demographics}, @request_ids )->hashes;
    # p @requests;

    for my $req (@requests) { # p $req; # get results data:
        my $request_id = $req->{request_id};

        # allocate GeL/participant id if screened:
        if ( my $arm = $req->{arm} ) {
            my $gel_id = ( lc $arm eq 'rare disease' )
                ? $request_id + 223000000
                : $request_id + 122000000;
            $req->{participant_id} = $gel_id;
        } # p $req;

        my %req_fields = map +($_ => $req->{$_}), # don't need request_num:
            grep $_ !~ /request_number/, keys %$req; # p \@req_fields;
        # build data structure for this request:
        my %h = ( Demographics => \%req_fields ); # p \%h;

        SECTION:
        for my $section_name (@lab_sections) { # p $section_name;
            # skip unless $section_name in $test_element_map keys:
            # my $section_element_map = $test_element_map->{uc($section_name)}
            #    or next SECTION; # p $section_element_map;

            my $data = do {
                my @bind = ( $request_id, $section_name );
                $dbix->query( $SQL{results}, @bind )->hashes;
            }; # p $data;

            RESULT: # each test/result pair for this section
            for my $ref (@$data) { # hashref of keys: test_name & result
                my $test_name = $ref->{test_name};
=begin # this is only needed to map test_names to GeL xml element names:
                # get XML element name or skip test (not required in XML data):
                my $element_name = $section_element_map->{$test_name}
                    or next RESULT; # p $element_name;
                # add element name and result to section data:
                $h{$section_name}{$element_name} = $ref->{result};
=cut
                # if NOT using GeL xml element names:
                $h{$section_name}{$test_name} = $ref->{result};
            }
        }
        # fluidx storage (1-to-many with request_id):
        if ( my @data = $dbix->query( $SQL{storage}, $request_id )->hashes ) {
            $h{Storage}{Vials}{vial} = \@data; # p \@data;
        }
        { # consent form filename (need to allow for >1 file):
            my $filepath = get_destination_sub_dir($req); # p $filepath;
            if ( -e $filepath ) {
                my @contents = io($filepath)->all;

                FILE:
                for my $f (@contents) { # p $f;
                    next FILE unless $f->type eq 'file';
                    # add as array in case >1 file in dir
                    push @{ $h{Consent}{Files}{file} }, $f->filename;

                    my $file_age = ( $timestamp->epoch - $f->ctime ); # p $file_age; # in seconds
                    next if $file_age > MAX_AGE; # p [ $f->filename, $age ];

                    { # transfer file (returns 0 if JUST_TESTING):
                        my %args = (
                            local_filename  => $f->name,
                            remote_filename => $f->filename,
                        ); # p \%args;
                        ftp_file(\%args);
                    }
                }
            }
        }
        { # create xml file & send:
            my $local_name = sprintf '%s_%s.xml',
                $request_id, $timestamp->strftime('%Y_%m%d_%H%M%S'); # p $local_name;
            my $full_path = join '/', TMP_DIR, $local_name; # p $full_path;
        
            my $content = to_xml(\%h); # say $content;
            $content > io($full_path);
            
            { # transfer file (returns 0 if JUST_TESTING):
                my %args = (
                    local_filename  => $full_path,
                    remote_filename => $local_name,
                );
                ftp_file(\%args);
            }
        }
    }
} 

#-------------------------------------------------------------------------------
sub ftp_file {
    my $args = shift;

    my $cfg = $settings->{ppm_server};

    # modify destination_dir for genomics data:
    my $remote_filename = join '/', 'HILIS_GEL', $args->{remote_filename};
    my $local_filename  = $args->{local_filename}; # p $local_filename;

    my %params = (
        local_filename  => $local_filename,
        remote_filename => $remote_filename,
        server_addr     => $cfg->{remote_address},
        username        => $cfg->{username},
        password        => $cfg->{password},
    ); # p \%params;

#===============================================================================
    unless ($JUST_TESTING) {
        # ftp file (returns str on failure, undef on success):
        my $rtn = $tools->ftp_file(\%params); # p $rtn;
    
        if ($rtn) { # p $rtn;
            $tools->mail_admin({ script => $0, msg => $rtn });
            warn "$0 - $rtn\n"; # dump to logs but don't die!!
        }
    }
#===============================================================================

    { # only delete xml file, not consent forms!!
        my $tmp_dir = TMP_DIR; # p $tmp_dir; # TMP_DIR doesn't work used direct in regex
        if ( $local_filename =~ m!^$tmp_dir/! ) { # say "deleting $local_filename";
            # io($local_filename)->unlink; # switched off for testing
        }
    }
}

#-------------------------------------------------------------------------------
sub to_xml {
    my $data = shift; # arrayref of request datasets

    # options for XMLout (needs array or hash):
    my %xs_opts = xs_options(); # p %xs_opts;
    my $xs = Local::XMLSimple->new(%xs_opts);

    my $ref = {
        FileCreationDateTime => $timestamp->strftime('%F %T'),
        # add direct if NOT using xml element names, otherwise use format_request():
        # Record  => format_request($data),
        Record    => $data,
    };

    # enclose xml in outer <add> block; add 'version' inline:
    # my $input = { add => $ref, version => 2 };
   	my $xml_out = $xs->XMLout($ref); # p $xml_out;
    return $xml_out;
}

#-------------------------------------------------------------------------------
sub xs_options {
    my %opts = (
        SuppressEmpty => 0, # ? only works for xml_in
        NoEscape      => 0, # should be default anyway, but doesn't work
        RootName      => 'HILIS4Genomics',
        XMLDecl       => q!<?xml version="1.0" encoding="UTF-8"?>!,
        KeyAttr       => [],
        NoAttr        => 1, # gives inline (scalar) AND nested (arrayref) attributes
    );
    return %opts;
}

#-------------------------------------------------------------------------------
# formats repeating request blocks (only needed if using xml element names):
sub format_request {
    my $r = shift; # p $r;

    my %data;
    { # registration/demographics:
        my $ref = $r->{Demographics};
        my %h = (
            nhs_number    => $ref->{nhs_number},
            date_of_birth => $ref->{dob},
            last_name     => $ref->{last_name},
            first_name    => $ref->{first_name},
            gender        => $ref->{gender},
        );
        $data{Registration} = \%h;
    } # p \%data;
    return \%data;
}

#-------------------------------------------------------------------------------
sub get_lab_test_to_xml_element_map { # only needed if using xml element names
    my @data = <DATA>; # p @data;

    my %map;
    for (@data) {
        chomp; # say $_;
        my ($section, $test_name, $element_name) = split ':', $_;
            # p [$section, $test_name, $element_name];
        next unless $section;
        $map{$section}{$test_name} = $element_name;
    } # p \%map;
    return \%map;
}

#-------------------------------------------------------------------------------
sub get_destination_sub_dir {
    my $ref = shift; # p $ref;

    my ($year)      = $ref->{registered} =~ m!^(\d{4})!; # not using requests.year
    my $request_num = $ref->{request_number}; # p [$year, $request_num];

    my $i = int ( $request_num / 100 ); # warn $i; # 1-99 = 0, 100-199 = 1, 200-299 = 2, etc

    my $mini_dir = sprintf '%s-%s',
        100 * $i || 1, # default to 1 if 0; 1, 100, 200, 300, etc
        100 * $i + 99; # 99, 199, 299, etc

    my $app_dir  = path($tools->path_to_app_root)->realpath; # p $app_dir;
    my $rel_path = 'static/files/genomics';

    my $destination_dir = join '/', # eg /path/to/uploads/2013
        $app_dir,
        $rel_path,
        $year,
        $mini_dir,
        $request_num; # p $destination_dir;
    return $destination_dir;
}

# map of lab-section => test name => XML element name
__DATA__
CONSENT:consent_date:date-of-consent
CONSENT:consent_form_version:name-and-version-of-consent-form
CONSENT:consent_taken:consent-given
CONSENT:info_sheet_version:name-and-version-of-participant-information-sheet
CONSENT:consent_q1:consent-question-1
CONSENT:consent_q2:consent-question-2
CONSENT:consent_q3:consent-question-3
CONSENT:consent_q4:consent-question-4
CONSENT:consent_q5:consent-question-5
CONSENT:consent_q6:consent-question-6
CONSENT:consent_q7:consent-question-7
CONSENT:consent_q8:consent-question-8
CONSENT:consent_q9:consent-question-9
CONSENT:consent_q10:consent-question-10
CONSENT:consent_q11:consent-question-11
CONSENT:consent_q12:consent-question-12
CONSENT:consent_q13:consent-question-13
CONSENT:consent_q14:consent-question-14

SPECIMENS:edta1:DNA Blood Germline
SPECIMENS:pst:LiHep Plasma
SPECIMENS:paxgene_rna:RNA Blood
SPECIMENS:sst:Serum
SPECIMENS:handling_protocol:Laboratory Method

STORAGE:vial_id:Laboratory Sample ID
STORAGE:rack_id:GMC Rack ID
STORAGE:rack_location:GMC Rack Well

DNA:edta1_qc_date:Test Result DateTime
DNA:edta1_qc_type:Test Result Type
DNA:edta1_qc_result:Test Result Value
DNA:edta1_fluidx:Laboratory Sample ID
DNA:edta1_fluidx_vol:Laboratory Sample Volume
DNA:edta1_fluidx_rack_id:GMC Rack ID
DNA:edta1_fluidx_rack_well:GMC Rack Well
DNA:edta1_fluidx_vol:Laboratory Remaining Volume Banked

DISPATCH:consignment_number:GMC Sample Consignment Number
DISPATCH:dna_dispatched:GMC Sample Dispatch Date
DISPATCH:omics_dispatched:GMC Sample Dispatch Date