#!/usr/bin/perl
=begin
--------------------------------------------------------------------------------
generates XML data file for genomics data, run from cron hourly
--------------------------------------------------------------------------------
=cut
###########################################################
my $duration = $ARGV[0] || 1; # hrs ago
my $JUST_TESTING = 1; # skips file transfer & file archive
###########################################################
use lib (
'/home/raj/perl5/lib/perl5',
'/home/raj/apps/HILIS4/lib',
'/home/raj/perl-lib', # Local::XMLSimple - patched to escape single-apostrophe
);
use LIMS::Local::ScriptHelpers;
use Local::XMLSimple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here
use Data::Printer alias => 'p', use_prototypes => 0;
use Modern::Perl;
use IO::All;
# get tools from LIMS::Local::ScriptHelpers:
my $tools = LIMS::Local::ScriptHelpers->new();
my $sql_lib = $tools->sql_lib();
my $dbix = $tools->dbix();
# switch to 'genomics' db:
$dbix->dbh->do('use `genomics`');
# get request ids:
my $request_ids = do {
my $query = $sql_lib->retr('genomics_requests'); # p $query;
my @bind = map $duration, (1..3); # p \@bind;
$dbix->query($query, @bind)->flat;
}; # p $request_ids; exit;
$request_ids = [9];
#-------------------------------------------------------------------------------
exit unless @$request_ids;
#-------------------------------------------------------------------------------
my $timestamp = $tools->time_now->datetime;
my $data_filename = sprintf 'hilis4_genomics_data_%s.xml', $timestamp; # p $data_filename;
# $data_filename = 'hmds_test_cosd.xml';
use constant TMP_DIR => '/tmp'; # where to create temp data file
my $lab_sections
= $dbix->select('lab_sections', 'section_name', { is_active => 'yes'} )->flat;
# p $sections;
# only need this if using xml element names for data feed:
my $test_element_map = get_lab_test_to_xml_element_map(); # p $test_element_map; exit;
my @data = ();
{ # go:
# get requests 1-to-1 data:
my @requests = do {
# from local.sql:
my $query = $sql_lib->retr('genomics_demographics'); # p $query;
$dbix->query($query, @$request_ids)->hashes;
}; # p @requests;
{ # get results data:
my $sql = $sql_lib->retr('genomics_results'); # p $sql;
for my $req (@requests) { # p $req; next;
# build data structure for this request:
my %h = ( Demographics => $req );
my $request_id = $req->{request_id};
SECTION:
for my $section_name (@$lab_sections) { # p $section_name;
# skip unless $section_name in $test_element_map keys:
my $section_element_map = $test_element_map->{uc($section_name)}
or next SECTION; # p $section_element_map;
my $data = do {
my @bind = ( $request_id, $section_name );
$dbix->query( $sql, @bind )->hashes;
}; # p $data;
RESULT: # each test/result pair for this section
for my $ref (@$data) { # hashref of keys: test_name & result
my $test_name = $ref->{test_name};
=begin # this is only needed to map test_names to GeL xml element names:
# get XML element name or skip test (not required in XML data):
my $element_name = $section_element_map->{$test_name}
or next RESULT; # p $element_name;
# add element name and result to section data:
$h{$section_name}{$element_name} = $ref->{result};
=cut
# if NOT using Gel xml element names:
$h{$section_name}{$test_name} = $ref->{result};
}
}
push @data, \%h;
}
}
} # p @data;
{ # create output file & push to remote server:
my $file_path = join '/', TMP_DIR, $data_filename;
my $content = to_xml(\@data); say $content;
$content > io($file_path);
# transfer file:
unless ($JUST_TESTING) {
my %args = ( local_filename => $file_path );
transfer_file(\%args);
}
}
#-------------------------------------------------------------------------------
sub to_xml {
my $data = shift; # arrayref of request datasets
my $count = @$data; # p $count;
# options for XMLout (needs array or hash):
my %xs_opts = xs_options(); # p %xs_opts;
my $xs = Local::XMLSimple->new(%xs_opts);
my $ref = {
Record => [], # will be populated in for @data block
RecordCount => $count,
FileCreationDateTime => $timestamp,
};
# sent to format_request() if using xml element names, otherwise use as-is:
# push @{ $ref->{Record} }, format_request($_) for @$data; # p $ref;
$ref->{Record} = $data; # p $ref;
# enclose xml in outer <add> block; add ulisa 'version' inline:
# my $input = { add => $vars, version => 2 };
my $xml_out = $xs->XMLout($ref); # p $xml_out;
return $xml_out;
}
#-------------------------------------------------------------------------------
sub xs_options {
my %opts = (
SuppressEmpty => 0, # ? only works for xml_in
NoEscape => 0, # should be default anyway, but doesn't work
RootName => 'HILIS4Genomics',
XMLDecl => q!<?xml version="1.0" encoding="UTF-8"?>!,
KeyAttr => [],
NoAttr => 1, # gives inline (scalar) AND nested (arrayref) attributes
);
return %opts;
}
#-------------------------------------------------------------------------------
# formats repeating request blocks (only needed if using xml element names):
sub format_request {
my $r = shift; # p $r;
my %data;
{ # registration/demographics:
my $ref = $r->{Demographics};
my %h = (
nhs_number => $ref->{nhs_number},
date_of_birth => $ref->{dob},
last_name => $ref->{last_name},
first_name => $ref->{first_name},
gender => $ref->{gender},
);
$data{Registration} = \%h;
} # p \%data;
return \%data;
}
#-------------------------------------------------------------------------------
sub get_lab_test_to_xml_element_map {
my @data = <DATA>; # p @data;
my %map;
for (@data) {
chomp; # say $_;
my ($section, $test_name, $element_name) = split ':', $_;
# p [$section, $test_name, $element_name];
next unless $section;
$map{$section}{$test_name} = $element_name;
} # p \%map;
return \%map;
}
# map of lab-section => test name => XML element name
__DATA__
CONSENT:consent_date:date-of-consent
CONSENT:consent_form_version:name-and-version-of-consent-form
CONSENT:consent_taken:consent-given
CONSENT:info_sheet_version:name-and-version-of-participant-information-sheet
CONSENT:consent_q1:consent-question-1
CONSENT:consent_q2:consent-question-2
CONSENT:consent_q3:consent-question-3
CONSENT:consent_q4:consent-question-4
CONSENT:consent_q5:consent-question-5
CONSENT:consent_q6:consent-question-6
CONSENT:consent_q7:consent-question-7
CONSENT:consent_q8:consent-question-8
CONSENT:consent_q9:consent-question-9
CONSENT:consent_q10:consent-question-10
CONSENT:consent_q11:consent-question-11
CONSENT:consent_q12:consent-question-12
CONSENT:consent_q13:consent-question-13
CONSENT:consent_q14:consent-question-14
SPECIMENS:edta1:DNA Blood Germline
SPECIMENS:pst:LiHep Plasma
SPECIMENS:paxgene_rna:RNA Blood
SPECIMENS:sst:Serum
SPECIMENS:handling_protocol:Laboratory Method
STORAGE:vial_id:Laboratory Sample ID
STORAGE:rack_id:GMC Rack ID
STORAGE:rack_location:GMC Rack Well
DNA:edta1_qc_date:Test Result DateTime
DNA:edta1_qc_type:Test Result Type
DNA:edta1_qc_result:Test Result Value
DNA:edta1_fluidx:Laboratory Sample ID
DNA:edta1_fluidx_vol:Laboratory Sample Volume
DNA:edta1_fluidx_rack_id:GMC Rack ID
DNA:edta1_fluidx_rack_well:GMC Rack Well
DNA:edta1_fluidx_vol:Laboratory Remaining Volume Banked
DISPATCH:consignment_number:GMC Sample Consignment Number
DISPATCH:dna_dispatched:GMC Sample Dispatch Date
DISPATCH:omics_dispatched:GMC Sample Dispatch Date