#!/usr/bin/perl =begin -------------------------------------------------------------------------------- CHANGE GeL/PARTICIPANT ID ALLOCATION AFTER DRY-RUN generates XML data file for genomics data, ftp transfer also transfers consent forms (or any file in request dir) with ctime within MAX_AGE run from cron hourly, or change MAX_AGE run as root/www-data or directory read permissions denied (on test server only ?) -------------------------------------------------------------------------------- =cut ############################################################## use constant MAX_AGE => 3600; # NEEDS TO BE SAME VALUE AS CRON my $duration = $ARGV[0] || MAX_AGE; # say $duration; # seconds my $JUST_TESTING = 1; # skips file transfer ############################################################## use lib ( '/home/raj/perl5/lib/perl5', '/home/raj/apps/HILIS4/lib', '/home/raj/perl-lib', # Local::XMLSimple - patched to escape single-apostrophe ); use LIMS::Local::ScriptHelpers; use Local::XMLSimple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here use Data::Printer alias => 'p', use_prototypes => 0; use FindBin qw($Bin); # warn $Bin; exit; use Modern::Perl; use Path::Tiny; use IO::All; # get tools from LIMS::Local::ScriptHelpers: my $tools = LIMS::Local::ScriptHelpers->new(); my $sql_lib = $tools->sql_lib(); my $dbix = $tools->dbix(); # switch to 'genomics' db: $dbix->dbh->do('use `genomics`'); my %SQL = ( demographics => $sql_lib->retr('genomics_demographics'), request_ids => $sql_lib->retr('genomics_requests'), storage => $sql_lib->retr('genomics_storage'), results => $sql_lib->retr('genomics_results'), ); # get request ids: my @request_ids = do { my $sql = $SQL{request_ids}; # count number of placeholders in $sql: my $n = () = $sql =~ /\?/g; # p $n; # bind one $duration per placeholder: my @bind = map $duration, (1..$n); # p \@bind; $dbix->query( $sql, @bind )->flat; # array }; # p \@request_ids; exit; #=============================================================================== exit unless @request_ids; #=============================================================================== my $timestamp = LIMS::Local::Utils::time_now(); my $cfg_file = "$Bin/../lib/settings.cfg"; my $settings = Config::Auto::parse($cfg_file); # p $settings; use constant TMP_DIR => '/tmp'; # where to create temp data file use constant LOGFILE => '/home/raj/crons/genomics_transfer.log'; # use constant CANCER_ID => 223000000; # use constant RARE_DISEASE_ID => 122000000; use constant CANCER_ID => 200230000; # dry-run use constant RARE_DISEASE_ID => 100230000; # dry-run my @lab_sections = do { my @args = ( 'lab_sections', ['section_name'], { is_active => 'yes'} ); $dbix->select(@args)->flat; # array }; # p \@lab_sections; # only need this if using xml element names for data feed: # my $test_element_map = get_lab_test_to_xml_element_map(); # p $test_element_map; exit; { # go: # get requests 1-to-1 data: my @requests = $dbix->query( $SQL{demographics}, @request_ids )->hashes; # p @requests; for my $req (@requests) { # p $req; # get results data: my $request_id = $req->{request_id}; # allocate GeL/participant id if screened: if ( my $arm = $req->{arm} ) { my $gel_id = ( lc $arm =~ /cancer/ ) ? $request_id + CANCER_ID : $request_id + RARE_DISEASE_ID; $req->{participant_id} = $gel_id; } # p $req; my %req_fields = map +($_ => $req->{$_}), # don't need request_num: grep $_ !~ /request_number/, keys %$req; # p \@req_fields; # build data structure for this request: my %h = ( Demographics => \%req_fields ); # p \%h; SECTION: for my $section_name (@lab_sections) { # p $section_name; # skip unless $section_name in $test_element_map keys: # my $section_element_map = $test_element_map->{uc($section_name)} # or next SECTION; # p $section_element_map; my $data = do { my @bind = ( $request_id, $section_name ); $dbix->query( $SQL{results}, @bind )->hashes; }; # p $data; RESULT: # each test/result pair for this section for my $ref (@$data) { # hashref of keys: test_name & result my $test_name = $ref->{test_name}; =begin # this is only needed to map test_names to GeL xml element names: # get XML element name or skip test (not required in XML data): my $element_name = $section_element_map->{$test_name} or next RESULT; # p $element_name; # add element name and result to section data: $h{$section_name}{$element_name} = $ref->{result}; =cut # if NOT using GeL xml element names: $h{$section_name}{$test_name} = $ref->{result}; } } # fluidx storage (1-to-many with request_id): if ( my @data = $dbix->query( $SQL{storage}, $request_id )->hashes ) { $h{Storage}{Vials}{vial} = \@data; # p \@data; } { # consent form filename (need to allow for >1 file): my $filepath = get_destination_sub_dir($req); # p $filepath; if ( -e $filepath ) { my @contents = io($filepath)->all; FILE: for my $f (@contents) { # p $f; next FILE unless $f->type eq 'file'; # add as array in case >1 file in dir push @{ $h{Consent}{Files}{file} }, $f->filename; my $file_age = ( $timestamp->epoch - $f->ctime ); # p $file_age; # in seconds next if $file_age > MAX_AGE; # p [ $f->filename, $age ]; { # transfer file (returns 0 if JUST_TESTING): my %args = ( local_filename => $f->name, remote_filename => $f->filename, ); # p \%args; ftp_file(\%args); } } } } { # create xml file & send: my $local_name = sprintf '%s_%s.xml', $request_id, $timestamp->strftime('%Y_%m%d_%H%M%S'); # p $local_name; my $full_path = join '/', TMP_DIR, $local_name; # p $full_path; my $content = to_xml(\%h); # say $content; $content > io($full_path); { # transfer file (returns 0 if JUST_TESTING): my %args = ( local_filename => $full_path, remote_filename => $local_name, ); ftp_file(\%args); } } } } #------------------------------------------------------------------------------- sub ftp_file { my $args = shift; my $cfg = $settings->{ppm_server}; # modify destination_dir for genomics data: my $remote_filename = join '/', 'HILIS_GEL', $args->{remote_filename}; my $local_filename = $args->{local_filename}; # p $local_filename; my %params = ( local_filename => $local_filename, remote_filename => $remote_filename, server_addr => $cfg->{remote_address}, username => $cfg->{username}, password => $cfg->{password}, ); # p \%params; #=============================================================================== unless ($JUST_TESTING) { # say "about to transfer $remote_filename"; # ftp file (returns str on failure, undef on success): my $rtn = $tools->ftp_file(\%params); # p $rtn; if ($rtn) { # p $rtn; $tools->mail_admin({ script => $0, msg => $rtn }); warn "$0 - $rtn\n"; # dump to logs but don't die!! - no smtp on test: io(LOGFILE)->append($rtn . "\n"); } else { my $msg = $timestamp . ': ' . $local_filename . "\n"; io(LOGFILE)->append($msg); } } #=============================================================================== { # only delete xml file, not consent forms!! my $tmp_dir = TMP_DIR; # p $tmp_dir; # TMP_DIR doesn't work used direct in regex if ( $local_filename =~ m!^$tmp_dir/! ) { # say "deleting $local_filename"; # io($local_filename)->unlink; # switched off for testing } } } #------------------------------------------------------------------------------- sub to_xml { my $data = shift; # arrayref of request datasets # options for XMLout (needs array or hash): my %xs_opts = xs_options(); # p %xs_opts; my $xs = Local::XMLSimple->new(%xs_opts); my $ref = { FileCreationDateTime => $timestamp->strftime('%F %T'), # add direct if NOT using xml element names, otherwise use format_request(): # Record => format_request($data), Record => $data, }; # enclose xml in outer block; add 'version' inline: # my $input = { add => $ref, version => 2 }; my $xml_out = $xs->XMLout($ref); # p $xml_out; return $xml_out; } #------------------------------------------------------------------------------- sub xs_options { my %opts = ( SuppressEmpty => 0, # ? only works for xml_in NoEscape => 0, # should be default anyway, but doesn't work RootName => 'HILIS4Genomics', XMLDecl => q!!, KeyAttr => [], NoAttr => 1, # gives inline (scalar) AND nested (arrayref) attributes ); return %opts; } #------------------------------------------------------------------------------- # formats repeating request blocks (only needed if using xml element names): sub format_request { my $r = shift; # p $r; my %data; { # registration/demographics: my $ref = $r->{Demographics}; my %h = ( nhs_number => $ref->{nhs_number}, date_of_birth => $ref->{dob}, last_name => $ref->{last_name}, first_name => $ref->{first_name}, gender => $ref->{gender}, ); $data{Registration} = \%h; } # p \%data; return \%data; } #------------------------------------------------------------------------------- sub get_lab_test_to_xml_element_map { # only needed if using xml element names my @data = ; # p @data; my %map; for (@data) { chomp; # say $_; my ($section, $test_name, $element_name) = split ':', $_; # p [$section, $test_name, $element_name]; next unless $section; $map{$section}{$test_name} = $element_name; } # p \%map; return \%map; } #------------------------------------------------------------------------------- sub get_destination_sub_dir { my $ref = shift; # p $ref; my ($year) = $ref->{registered} =~ m!^(\d{4})!; # not using requests.year my $request_num = $ref->{request_number}; # p [$year, $request_num]; my $i = int ( $request_num / 100 ); # warn $i; # 1-99 = 0, 100-199 = 1, 200-299 = 2, etc my $mini_dir = sprintf '%s-%s', 100 * $i || 1, # default to 1 if 0; 1, 100, 200, 300, etc 100 * $i + 99; # 99, 199, 299, etc my $app_dir = path($tools->path_to_app_root)->realpath; # p $app_dir; my $rel_path = 'static/files/genomics'; my $destination_dir = join '/', # eg /path/to/uploads/2013 $app_dir, $rel_path, $year, $mini_dir, $request_num; # p $destination_dir; return $destination_dir; } # map of lab-section => test name => XML element name __DATA__ CONSENT:consent_date:date-of-consent CONSENT:consent_form_version:name-and-version-of-consent-form CONSENT:consent_taken:consent-given CONSENT:info_sheet_version:name-and-version-of-participant-information-sheet CONSENT:consent_q1:consent-question-1 CONSENT:consent_q2:consent-question-2 CONSENT:consent_q3:consent-question-3 CONSENT:consent_q4:consent-question-4 CONSENT:consent_q5:consent-question-5 CONSENT:consent_q6:consent-question-6 CONSENT:consent_q7:consent-question-7 CONSENT:consent_q8:consent-question-8 CONSENT:consent_q9:consent-question-9 CONSENT:consent_q10:consent-question-10 CONSENT:consent_q11:consent-question-11 CONSENT:consent_q12:consent-question-12 CONSENT:consent_q13:consent-question-13 CONSENT:consent_q14:consent-question-14 SPECIMENS:edta1:DNA Blood Germline SPECIMENS:pst:LiHep Plasma SPECIMENS:paxgene_rna:RNA Blood SPECIMENS:sst:Serum SPECIMENS:handling_protocol:Laboratory Method STORAGE:vial_id:Laboratory Sample ID STORAGE:rack_id:GMC Rack ID STORAGE:rack_location:GMC Rack Well DNA:edta1_qc_date:Test Result DateTime DNA:edta1_qc_type:Test Result Type DNA:edta1_qc_result:Test Result Value DNA:edta1_fluidx:Laboratory Sample ID DNA:edta1_fluidx_vol:Laboratory Sample Volume DNA:edta1_fluidx_rack_id:GMC Rack ID DNA:edta1_fluidx_rack_well:GMC Rack Well DNA:edta1_fluidx_vol:Laboratory Remaining Volume Banked DISPATCH:consignment_number:GMC Sample Consignment Number DISPATCH:dna_dispatched:GMC Sample Dispatch Date DISPATCH:omics_dispatched:GMC Sample Dispatch Date