RSS Git Download  Clone
Raw Blame History
#!/usr/bin/perl

=begin
--------------------------------------------------------------------------------
* generates XML data file for genomics data, ftp transfer, archive as gzip
* also transfers consent forms (or any file in target dir) with ctime <= -s param
* transfers validation.log if any failures added during time window
* run from cron - set duration in arg eg "genomics_xml.pl -s 900"
* set ppm_server -> remote_addr -> localhost in settings.cfg to auto-set $JUST_TESTING
* validated using XML::Validator::Schema
	only supports 'unqualified' attributeFormDefault and elementFormDefault
	does not recognise xs:long for nhs_number - use xs:positiveInteger instead
* transferred xml files in /backups/genomics
* on ftp file transfer failure, xml/pdf files retained in /tmp
* can run script manually passing -s seconds from 1st ftp transfer failure
* manually override @request_ids to force transfer of specific file(s) from scratch

* to test: $0 -t[q] -s 1 -r <request_number> [requires but doesn't use duration (s),
    skips file tranfer, dumps xml in /tmp, doesn't update participant id]
 --------------------------------------------------------------------------------
=cut

=begin # what will transfer and why:
# set one of these:
SET @dateTime = '2017-09-18 16:15:00';
# SET @dateTime = DATE_SUB(NOW(), INTERVAL 15 MINUTE);

SELECT
	r.id as request_id, r.request_number, r.year,
	IF(r.created_at >= @dateTime, r.created_at, NULL) as request_registered,
	IF(r.updated_at >= @dateTime, r.updated_at, NULL) as request_updated,
	IF(rh.time      >= @dateTime, rh.time,      NULL) as request_history_time,
	IF(p.updated_at >= @dateTime, p.updated_at, NULL) as patient_updated,
	IF(pc.time      >= @dateTime, pc.time,      NULL) as patient_case_updated,
	IF(tr.time      >= @dateTime, tr.time,      NULL) as test_result_time,
	IF(th.time      >= @dateTime, th.time,      NULL) as file_upload_time
FROM requests AS r
    INNER JOIN patient_case AS pc ON ( r.patient_case_id = pc.id )
    INNER JOIN patients AS p ON ( pc.patient_id = p.id )
    LEFT OUTER JOIN request_history AS rh ON ( rh.action = 'screened' AND rh.request_id = r.id )
    LEFT OUTER JOIN request_lab_test_results AS tr ON ( tr.request_id = r.id )
    LEFT OUTER JOIN request_lab_test_history AS th ON ( th.action RLIKE '^uploaded' AND th.request_id = r.id )
WHERE (
	r.created_at >= @dateTime OR
	r.updated_at >= @dateTime OR
	p.updated_at >= @dateTime OR
	tr.time >= @dateTime OR
	th.time >= @dateTime OR
	rh.time >= @dateTime OR
	pc.time >= @dateTime )
GROUP BY r.id;
=cut

use Getopt::Std;
getopts('d:qr:s:t');
our(
    $opt_d, # (d)atabase - default 'genomics'
    $opt_q, # (q)uery output
    $opt_r, # (r)equest_id
    $opt_s, # (s)econds duration
    $opt_t, # (t)esting
);
$ENV{SQL_TRACE} = $opt_q;
$ENV{CENTRE} = 'genomics'; # for get_yaml_file() to find correct settings dir
# warn $opt_d; warn $opt_t; exit;

my $JUST_TESTING = $opt_t || 0; # skips file transfer (is auto-set to 1 for dev)

#===============================================================================
my $duration = $opt_s || die 'require time param (in seconds): -s <xxx>'; # warn $duration; # seconds
my $database = $opt_d || 'genomics'; # warn $database;

my $system_user = 'crontab'; # users.username of system user

my %contacts = (
	sheffield => [
        'samantha.scothern',
		'freyja.docherty',
        'gill.scott',
	],
	common => [
        'sarah.fitzgerald',
#		'denise.hancock',
        'emma.clossick',
#	    'bridget.morgan',
#		'richard.holmes',
		'raj',
	],
	leeds => [], # just use 'common' contacts
);
my $xsd = 'genomics.xsd'; # current xsd - symlinked to most recent file
my @skip_requests = (5188); # duplicate requests
#===============================================================================

use lib (
    '/home/raj/perl5/lib/perl5',
    '/home/raj/apps/HILIS4/lib',
    '/home/raj/perl-lib', # Local::XMLSimple - patched to escape single-apostrophe
);
use IO::Compress::Gzip qw(gzip $GzipError);
use LIMS::Local::GenomicsValidation;
use LIMS::Local::ScriptHelpers;
use Local::XMLSimple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here
use Data::Printer alias => 'p', use_prototypes => 0;
use Encoding::FixLatin qw/fix_latin/;
use FindBin qw($Bin); # warn $Bin; exit;
use File::Basename;
use Data::Dumper;
use Modern::Perl;
use Local::Utils;
use Path::Tiny;
use Local::DB;
use Template;
use IO::All;

my $cfg_file = $Bin . '/../lib/settings.cfg';  # p $cfg_file;
my $settings = Config::Auto::parse($cfg_file); # p $settings;
my $time_now = Local::Utils::time_now;
my $xsd_src  = $Bin . '/../../../setup/schema/xsd/' . $xsd; # p $xsd_src;

my $ppm_settings = $settings->{ppm_server};	   # p $ppm_settings;
my $genomics_cfg = $settings->{genomics};      # p $genomics_cfg;
my $genomics_uat = $genomics_cfg->{uat};       # p $genomics_uat; exit;

#===============================================================================
# override $JUST_TESTING if test script or dev box:
$JUST_TESTING ||= ( $ENV{HARNESS_ACTIVE} ||
	$ppm_settings->{remote_address} eq 'localhost' ); # warn $JUST_TESTING; exit;
#===============================================================================

# set some package vars (auto-stringify datetime objects & don't log queries):
$Local::QueryLogger::NO_QUERY_LOGS = 1; # don't need queries in logs dir
$Local::DBIx::Simple::Result::STRINGIFY_DATETIME = 1; # as we hand $data straight to XMLout()
# recent XML::Simple version (2.21) outputs uninitialized warnings on undef fields:
# no warnings "XML::Simple"; # kills hmds_test - need to delete undefs in data blocks

# get tools from LIMS::Local::ScriptHelpers:
my $tools = LIMS::Local::ScriptHelpers->new();
my $dbix  = Local::DB->dbix({ dbname => $database });
my $tt    = Template->new({ TAG_STYLE => 'asp' });

my $sql_lib = $tools->sql_lib();
my $config  = $tools->config(); # for email validation failure

my $validator = LIMS::Local::GenomicsValidation
    ->new(dbix => $dbix, cfg => $settings, xsd_src => $xsd_src);

# function to expand 30-char limit to full GeL specification:
my $result_expansion = $tools->get_yaml_file('expand_results')
|| die "cannot parse expand_results.yml"; # p $result_expansion;

# get request ids:
my @request_ids = $opt_r || do {
    my ($sql, @bind) = _request_ids(); # p [$sql, @bind];
    $dbix->query( $sql, @bind )->column; # new Local::DBIx::Simple::Result method
}; # p \@request_ids; exit;
#===============================================================================
exit unless @request_ids;
#===============================================================================

my $timestamp = LIMS::Local::Utils::time_now();

use constant TMP_DIR => '/tmp'; # where to create temp data file
use constant LOGFILE => '/home/raj/crons/genomics_transfer.log';
use constant INVALID => '/home/raj/crons/genomics_validation.log';

my @lab_sections = do {
    my @args = ( 'lab_sections', ['section_name'], { is_active => 'yes' } );
    $dbix->select(@args)->column; # array
}; # p \@lab_sections;

# only need this if using xml element names for data feed:
# my $test_element_map = get_lab_test_to_xml_element_map(); # p $test_element_map; exit;

{ # go:
    # get requests 1-to-1 data:
    my ($sql, @bind) = _demographics(\@request_ids);
    my @requests = $dbix->query($sql, @bind)->hashes;
    # p @requests;

	REQUEST:
    for my $req (@requests) { # p $req; # get results data:
        my $request_id = $req->{request_id};
        next REQUEST if grep $request_id == $_, @skip_requests;

        # allocate GeL/participant id if screened and not already done:
        if ( my $arm = $req->{arm} ) { # if screened
            my $gel_id = $genomics_uat->{rare_disease} + $request_id; # 122_000_000 + req_id
            # add increment if screened as cancer (ie to make 223_000_000 + req_id)
            $gel_id += $genomics_uat->{cancer_incr} if $arm =~ /cancer/i; # p $gel_id;

            # update unit_number -> participant_id if not already:
            unless ( _matches($req->{participant_id}, $gel_id) ) {
                $req->{participant_id} = $gel_id;
                update_participant_id($req) unless $JUST_TESTING;
            }
        }
        else { # will be invalid xml entry 'UNKNOWN' so delete:
            delete $req->{participant_id};
        }

        my %req_fields = map +($_ => $req->{$_}), # skip unrequired fields:
            grep { $_ !~ /\b(year|request_number|patient_case_id)\b/ }
                # skip undef vals to avoid uninitialized val warns from XML::Simple
                grep defined $req->{$_}, keys %$req; # p \%req_fields;

		# remove trailing 'cancer' from disease_type_registration:
		$req_fields{disease_type_registration} =~ s/\scancer\Z//i
            if $req_fields{disease_type_registration}; # only exists for cancer
        # lab number:
        $req_fields{lab_number} = join '/',
            $req->{request_number}, $req->{year} - 2000; # p \%req_fields;

        # build data structure for this request:
        my %h = ( Demographics => \%req_fields ); # p \%h;

        SECTION:
        for my $section_name ( @lab_sections ) { # p $section_name;
            # skip unless $section_name in $test_element_map keys:
            # my $section_element_map = $test_element_map->{uc($section_name)}
            #    or next SECTION; # p $section_element_map;

            my @data = do {
                my ($sql, @bind) = _results( $request_id, $section_name );
                $dbix->query( $sql, @bind )->hashes;
            }; # p \@data;

			# remove any non-word chars from section name -> CamelCase:
            my $section_element_name = join '', map ucfirst $_, grep $_ !~ /\d/,
                split '\W', $section_name; # p $section_element_name;

            RESULT: # each test/result pair for this section (query ensures no undef vals)
            for my $ref (@data) { # hashref of keys: test_name & result
                my $test_name = $ref->{test_name};
				my $result    = expand_result($ref->{result}); # 30-char limit on result
=begin # this is only needed to map test_names to GeL xml element names:
                # get XML element name or skip test (not required in XML data):
                my $test_element_name = $section_element_map->{$test_name}
                    or next RESULT; # p $element_name;
                # add element name and result to section data:
                $h{$section_element_name}{$test_element_name} = $ref->{result};
=cut
                # if NOT using GeL xml element names:
                $h{$section_element_name}{$test_name} = $result; # p $result;
            }
        }
        { # fluidx storage (1-to-many with request_id):
            my ($sql, @bind) = _storage($request_id);
            if ( my @data = $dbix->query($sql, @bind)->hashes ) {
                # ensure no undef values or XML::Simple v2.21+ issues uninitialized value warnings:
                remove_undef_values($_) for @data;
                $h{Storage}{Vials}{vial} = \@data; # p \@data;
            }
        }
        { # consent taken & withdrawn - from request_lab_test_history file:
            my ($sql, @bind) = _consent($request_id);
            my $data = $dbix->query($sql, @bind)->map_hashes('action');
			if ( my $event = $data->{consent_given} ) {
				my @user = @{$event}{ qw/first_name last_name/ };
				$h{Consent}{consent_taken_by}
					= join ' ', map ucfirst $_, @user;
			}
			if ( my $event = $data->{consent_withdrawn} ) { # infrequently used:
				my @user = @{$event}{ qw/first_name last_name/ };
				$h{ConsentWithdrawal}{withdrawal_taken_by}
					= join ' ', map ucfirst $_, @user;
			}
        } # p \%h;
        { # consent form filename (need to allow for >1 file):
            my $filepath = get_destination_sub_dir($req); # p $filepath;
            if ( -e $filepath ) {
                my @contents = io($filepath)->all;

                FILE:
                for my $f (@contents) { # p $f;
                    next FILE unless $f->type eq 'file';
					my $filename = join '~',
                        $request_id, fix_latin($f->filename);
                    # add as array in case >1 file in dir
                    push @{ $h{Consent}{Files}{file} }, $filename;

                    my $file_age = ( $timestamp->epoch - $f->ctime ); # p $file_age; # in seconds
                    next FILE if $file_age > $duration; # p [ $f->filename, $age ];

                    { # file < $duration so transfer it (returns 0 if JUST_TESTING):
                        my %args = (
                            local_filename  => $f->name,
                            remote_filename => $filename,
                        ); # p \%args;
                        ftp_file(\%args);
                    }
                }
            }
        } # p \%h;

        # validate params for patterns that cannot be done by XML validation libs
        # in validate_xml_against_xsd():
        if ( my $errs = $validator->validate_params(\%h) ) { # p $errs; # arrayref
            unless ($JUST_TESTING) {
                for my $err(@$errs) {
                    my %h = ( data => \%req_fields, error => $err );
                    log_and_notify_admin(\%h);
                }
            }
            else { # p \%req_fields;
				say $req_fields{lab_number}, ' ', $_ for @$errs;
			}
            next REQUEST unless $ENV{HARNESS_ACTIVE}; # need file to be generated
        }
        { # create and validate xml file:
            my $content = to_xml(\%h); # say $content;

			# validate xml, returns XML::SAX::Exception object on failure, undef on success:
			if ( my $error = $validator->validate_xml_against_xsd($content) ) { # warn $error;
				unless ($JUST_TESTING) {
					my %h = ( data => \%req_fields, error => $error );
                    log_and_notify_admin(\%h);
					next REQUEST;
                }
                else { # need the xml file for genomics.t or tests fail
					say $req_fields{lab_number}, ' ', $error;
				}
            }

            my $local_name = $ENV{HARNESS_ACTIVE}
				? "${database}.xml"
				: sprintf '%s_%s.xml',
					$request_id, $timestamp->strftime('%Y_%m%d_%H%M%S'); # p $local_name;
            my $full_path = join '/', TMP_DIR, $local_name; # p $full_path;

            $content > io($full_path);

            if ( $ENV{HARNESS_ACTIVE} ) { # dump %h to file and exit loop:
                io(join '/', TMP_DIR, "${database}.txt")->print(Dumper \%h);
            }
			{ # transfer file (returns 0 if JUST_TESTING):
				my %args = (
					local_filename  => $full_path,
					remote_filename => $local_name,
                    ascii_mode      => 1, # override default binary mode
				);
				ftp_file(\%args);
			}
			#===================================================================
			next REQUEST if $JUST_TESTING; # don't archive - .xml retained in /tmp
			#===================================================================
            { # archive in genomics_archive dir:
				my $archive_dir = $ppm_settings->{genomics_archive}
					or die 'no archive dir specified in settings.cfg';
				my $archive = join '/', $archive_dir, $local_name;
				my $z = new IO::Compress::Gzip($archive . '.gz') or die $!;
				$z->write($content);
				$z->close();
            }
        }
    }
    { # send validation failure log (returns if JUST_TESTING):
        my $log = io(INVALID); # get timestamp of genomics_validation.log:
        if ( $timestamp->epoch - $log->ctime < $duration ) {
            my %args = (
                local_filename  => INVALID,
                remote_filename => 'validation_failure.log',
                # remote_filename => sprintf 'validation_%s.log', $timestamp,
            ); # p \%args;
            ftp_file(\%args);
        }
    }
}

#-------------------------------------------------------------------------------
sub ftp_file {
    my $args = shift;

    my $remote_filename = $args->{remote_filename};
    my $local_filename  = $args->{local_filename}; # p $local_filename;
    my $ascii_mode      = $args->{ascii_mode} || 0; # optional - only for xml files

    my %params = (
        local_filename  => $local_filename,
        remote_filename => $remote_filename,
        server_addr     => $ppm_settings->{remote_address},
        username        => $ppm_settings->{username},
        password        => $ppm_settings->{password},
        cwd             => 'HILIS_GEL', # destination_dir for genomics data
        passive_mode    => 1,
        ascii_mode      => $ascii_mode, # override default binary mode in ScriptHelpers::ftp_file()
   ); # p \%params;

#===============================================================================
	return if $JUST_TESTING; # say "about to transfer $remote_filename";
#===============================================================================
    if ( grep $local_filename =~ /$_(_\d{4}_\d{4}_\d{6})/, @skip_requests ) { # shouldn't happen but JIC
        say "skipped ftp transfer for $local_filename";
        return;
    }
	{
	    # ftp file (returns str on failure, undef on success):
        my $rtn = $tools->ftp_file(\%params); # p $rtn;
        if ($rtn) { # p $rtn;
			my $script = File::Basename::basename($0);
			my $time   = $timestamp->strftime('%Y-%m-%d %T');
			my $msg    = sprintf q!FTP transfer error for %s, remote server !
						 . q!returned "%s"!, $remote_filename, $rtn;

            warn "$script [$time] $msg"; # dump to logs but don't die!! - no smtp on test:
            $tools->mail_admin({ script => $script, msg => $msg });

            my $logfile_msg = format_msg($rtn);
            io(LOGFILE)->append($logfile_msg);
        }
        else {
            my $msg = format_msg($local_filename);
            io(LOGFILE)->append($msg);
			# only delete xml file, not consent forms!!
		    my $tmp_dir = TMP_DIR; # p $tmp_dir; # TMP_DIR doesn't work used direct in regex
			if ( $local_filename =~ m!^$tmp_dir/! ) { # say "deleting $local_filename";
			    io($local_filename)->unlink;
			}
		}
    }
}

#-------------------------------------------------------------------------------
sub to_xml {
    my $data = shift; # arrayref of request datasets

	# remove leading/trailing white-space (eg introduced by pasting from excel):
	_trim($data);

    # ensure data conforms to XSD:
   #  _validate_for_xsd($data); # replaced with validate_xml_against_xsd() so vals get fixed

    # options for XMLout (needs array or hash):
    my %xs_opts = xs_options(); # p %xs_opts;
    my $xs = Local::XMLSimple->new(%xs_opts);

    my $ref = {
        FileCreationDateTime => $timestamp->datetime, # require string
        # add direct if NOT using xml element names, otherwise use format_request():
        # Record  => format_request($data),
        Record    => $data,
    }; # p $ref;
    # stringify datetime objects (as XML::Simple can't):
    # transform_datetime_to_string($ref); using $Local::DBIx::Simple::Result::STRINGIFY_DATETIME

    # enclose xml in outer <add> block; add 'version' inline:
    # my $input = { add => $ref, version => 2 };
   	my $xml_out = $xs->XMLout($ref); # p $xml_out;
    return $xml_out;
}

#-------------------------------------------------------------------------------
sub log_and_notify_admin {
	my $args = shift; # href of data & error

	my $data = $args->{data};

    { # log validation failure:
        chomp(my $error = $args->{error}); # p $error; # stringifies as scalar - remove new line (if present)
        my $msg = sprintf "%s %s %s\n",
            $timestamp->strftime('%Y-%m-%d %T'),
            $data->{participant_id},
            $error;
        io(INVALID)->append($msg);
    }

	my $message = _format_body_text($args); # p $message;
	my $centre  = ( grep $data->{organisation_code} =~ /^$_/, qw/RHQ RCU/ )
		? 'sheffield' : 'leeds'; # 3 Sheffield, all others Leeds (Harrogate, Bradford, etc)
	my $subject = sprintf 'Genomics [%s] xml validation failure', uc $centre;

	# send email to admin:
	my %mail = (
		config  => $config,
		subject => $subject,
		message => $message,
	); # p \%mail;

	my @recipients = ( @{ $contacts{$centre} }, @{ $contacts{common} } ); # p \@recipients;
	$tools->send_mail(\%mail, \@recipients);
}

#-------------------------------------------------------------------------------
sub xs_options {
    my %opts = (
        SuppressEmpty => 0, # ? only works for xml_in
        NoEscape      => 0, # should be default anyway, but doesn't work
        RootName      => 'HILIS4Genomics',
        XMLDecl       => q!<?xml version="1.0" encoding="UTF-8"?>!,
        KeyAttr       => [],
        NoAttr        => 1, # gives inline (scalar) AND nested (arrayref) attributes
    );
    return %opts;
}

#-------------------------------------------------------------------------------
sub get_destination_sub_dir {
    my $ref = shift; # p $ref;

    my ($request_num, $year) = ($ref->{request_number}, $ref->{year}); # p [$year, $request_num];

    my $i = int ( $request_num / 100 ); # warn $i; # 1-99 = 0, 100-199 = 1, 200-299 = 2, etc

    my $mini_dir = sprintf '%s-%s',
        100 * $i || 1, # default to 1 if 0; 1, 100, 200, 300, etc
        100 * $i + 99; # 99, 199, 299, etc

    my $app_dir  = path($tools->path_to_app_root)->realpath; # p $app_dir;
    my $rel_path = 'static/files/genomics';

    my $destination_dir = join '/', # eg /path/to/uploads/2013
        $app_dir,
        $rel_path,
        $year,
        $mini_dir,
        $request_num; # p $destination_dir;
    return $destination_dir;
}

sub remove_undef_values { # based on kaitlyn's transform_datetime_to_string() below:
    my $r = shift; # p $r;
    return unless ref $r eq 'HASH';

    for my $key( keys %$r ) { # p [$key, $r->{$key}];
        if ( ref $r->{$key} eq 'HASH' ) { # recursive call for hashrefs
            remove_undef_values( $r->{$key} );
        }
        defined $r->{$_} || delete $r->{$_} for keys %$r; # p $r;
    }
}

sub update_participant_id {
	my $req = shift; # p $req;

	my $patient_case_id = $req->{patient_case_id}; # p $patient_case_id;
	my $participant_id  = $req->{participant_id};  # p $participant_id;
	my $request_id      = $req->{request_id};

	my $result = $dbix->update('patient_case',
		{ unit_number => $participant_id }, { id => $patient_case_id });
	if ($result->rows) { # patient_case updated:
		my $user_id =
			$dbix->select('users', ['id'], { username => $system_user })->value;
		my %h = (
			request_id => $request_id,
			user_id    => $user_id,
			action 	   => 'auto-set participant ID',
		);
		$dbix->insert('request_history', \%h);
	}
    else {
        warn sprintf "participant id %s failed to update on request id %s",
            $participant_id, $request_id;
    }
}

sub expand_result { # maps abbreviated results (<30 chars) to full GeL definition:
	my $result = shift;
	return $result_expansion->{$result} || $result;
}

sub format_msg {
    my $str = shift;

    my $app_ver = get_app_version();
    my $msg = sprintf "%s [%s]: %s\n",
        $timestamp->strftime('%Y-%m-%d %T'), $app_ver, $str;
    return $msg;
}

sub get_app_version {
    chdir '/home/raj/apps/HILIS4';
	chomp( my $raw_time = `/usr/bin/git log -1 --format=%cd --date=raw` ); # warn $raw_time; # cmd appends new-line
	# extract epoch seconds eg 1455628027 +0000 [GMT], 1460022637 +0100 [BST], etc:
	my ($epoch) = $raw_time =~ /^(\d+)\s\+0[01]00/ or # don't use '||' here - gets truth
		die "could'nt extract epoch time from `git log` output"; # warn $epoch;
	my $vnumber = 999 + `/usr/bin/git rev-list HEAD --count`; # as 1st in git repo = svn #1000

    return sprintf '%.4f', 4 + ( $vnumber / 10000 ); # ensure 4 digits
}

sub _format_body_text {
	my $args = shift; # hashref or data & error

	chomp(my $error = $args->{error}); # p $error; # stringifies as scalar - remove new line
	my $data = $args->{data};  # p $data; # href

	# expand $error if it matches 'element <foo>'
	if ( $error =~ m!element <.*>! ) { # replace <foo> with label & section
		$error = $validator->reformat_error($error);
    }

	my $msg; # generate message:
	my $tmpl = q!Lab number <% lab_number %>! .
		q!<% IF participant_id %> [<% participant_id %>]<% END %>! .
		q!: <% err %> Record is invalid and has not been uploaded.!;
    $tt->process(\$tmpl, { %$data, err => $error }, \$msg) or die $tt->error();

	return $msg;
}

sub _matches { Local::Utils::matches(@_) }

sub _trim {
	my $data = shift; # p $data;
    KEY:
	for my $key ( keys %$data ) { #	say "$key: $data->{$key}"; # p $data->{$key};
        # next unless $data->{$key}; # only needed if allowing NULL values
        if ( ref($data->{$key}) eq 'HASH' ) { # recursive call for hashrefs
			# warn $data->{$key};
            _trim( $data->{$key} ); # p $r->{$key};
			next KEY; # not sure why this is necessary ??
        } # say "$key: $data->{$key}";
		# remove leading/trailing space chars (incl. unicode 160) from string:
		$data->{$key} =~ s/^\p{Space}//g;
		$data->{$key} =~ s/\p{Space}$//g; # p $data->{$key};
	} # p $data;
	return 0; # no need - modifying hashref in situ
}

# SQLAM ------------------------------------------------------------------------
=begin
sub _get_query_for_request_ids { # replaced with SQLA::More method
    my $sql = $SQL{request_ids};
    # count number of placeholders in $sql:
    my $n = () = $sql =~ /\?/g; # p $n;
    # bind one $duration per placeholder:
    my @bind = map $duration, (1..$n); # p \@bind;
    return ($sql, @bind);
}
=cut
# SQLA::More method - works but far too slow (20sec vs 2sec) - due to position
# of 'th.action' much slower when part of 'where' rather than 'join' - fixed using
# operator & condition in join defs (http://scsys.co.uk:8007/559045):
sub _request_ids {
    my $interval = $time_now->subtract( seconds => $duration ); # p $interval;
    my @cols = ( 'DISTINCT(r.id)' );
    my @rels = (
        'requests|r'         =>  q{r.patient_case_id=pc.id}					   ,
        'patient_case|pc'    =>  q{pc.patient_id=p.id}     					   ,
        'patients|p'       	 =>	 q(=>{rh.request_id=r.id,rh.action='screened'}), # no spaces !!
		'request_history|rh' =>  q{=>tr.request_id=r.id}					   ,
        'request_lab_test_results|tr' => { # need to use operator/condition due to 'like':
                operator  => '=>', # options are <=>, <=, =>, ==
                condition => {
                    'th.request_id' => {
                        '=' => { -ident => 'r.id' }, # ident prevents addition to @bind
                    },
                    'th.action' => {
                        'like' => 'uploaded %',
                    },
                },
            },
        'request_lab_test_history|th',
    );
    my %where = (
        -or => [
            'r.created_at' => { '>=' => $interval },
            'r.updated_at' => { '>=' => $interval },
            'p.updated_at' => { '>=' => $interval },
            'tr.time'      => { '>=' => $interval },
#            -and => [ # th.action & rh.action moved back into joins:
#                'th.action' => { like => 'uploaded %' }, # THIS IS VERY SLOW HERE
#                'th.time'   => { '>=' => $interval },
#            ],
#            -and => [
#                'rh.action' => 'screened',
#                'rh.time'   => { '>=' => $interval },
#            ],
            'th.time'      => { '>=' => $interval },
            'rh.time'      => { '>=' => $interval },
            'pc.time'      => { '>=' => $interval },
        ],
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    ); # p @args;
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind); exit;
    return ($sql, @bind);
}
#=cut

sub _demographics {
    my $request_ids = shift;
    my @cols = (
        qw/
            r.id|request_id
            r.request_number
            r.year
            r.created_at|registered
            p.last_name
            p.first_name
            p.nhs_number
            p.gender
            p.dob
            pc.id|patient_case_id
            pc.unit_number|participant_id
            rs.display_name|referral_source
            rs.organisation_code
        /,
        # returns RD or Cancer (incl HaemOnc) or NULL:
        q!CASE WHEN s.description = 'Rare disease' THEN s.description
            WHEN sc.name IS NOT NULL THEN 'Cancer' END AS arm!,
        # disease_type_registration not required for Rare disease:
        q!CASE s.description WHEN 'Rare disease' THEN NULL
            ELSE s.description END AS disease_type_registration!,
        qw/
            r2.name|referrer_name
            r2.national_code|referrer_code
            rh.time|screened
        /,
    );
    no warnings 'qw'; # comma in rh join
    my @rels = qw(
        requests|r                   r.patient_case_id=pc.id
        patient_case|pc              pc.patient_id=p.id
        patients|p                   pc.referral_source_id=rs.id
        referral_sources|rs          r.referrer_department_id=rd.id
        referrer_department|rd       rd.referrer_id=r2.id
        referrers|r2                 =>ris.request_id=r.id
        request_initial_screen|ris   =>ris.screen_id=s.id
        screens|s                    =>s.category_id=sc.id
        screen_category|sc           =>rh.request_id=r.id,rh.action='screened'
        request_history|rh
    );
    my %where = ( 'r.id' => { -in => $request_ids } );

    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
        -order_by => 'r.id',
    );
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind);
    return ($sql, @bind);
}

sub _results {
    my ($request_id, $section_name) = @_;
    my @cols = qw( lt.test_name  tr.result );
    my @rels = qw(
        request_lab_test_results|tr     tr.lab_test_id=lt.id
        lab_tests|lt                    lt.lab_section_id=ls.id
        lab_sections|ls
    );
    my %where = (
        'tr.request_id'   => $request_id,
        'ls.section_name' => $section_name,
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    );
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind);
    return ($sql, @bind);
}

sub _storage {
    my $request_id = shift;
    my @cols = qw(
        rs.vialId
        rs.sample
        rs.part_number
        rs.source
        sr.plateId|rack_id
        rs.vial_location
    );
    my @rels = qw( request_storage|rs   =>rs.rack_id=sr.id   storage_racks|sr );
    my %where = ( 'rs.request_id' => $request_id );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    );
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind);
    return ($sql, @bind);
}

sub _consent {
    my $request_id = shift;
    my @cols = (
        'u.first_name',
        'u.last_name',
        q!CASE WHEN t1.action RLIKE 'Consent given' THEN 'consent_given'
            ELSE 'consent_withdrawn' END AS 'action'!,
    );
    my @rels = qw( request_lab_test_history|t1    t1.user_id=u.id    users|u );
    my $str = 'auto-set (Consent given|Withdrawal date) status to complete';
    my %where = (
        't1.request_id' => $request_id,
        't1.action'     => { -rlike => $str },
    );
    my @args = (
		-columns  => \@cols,
		-from     => [ -join => @rels ],
		-where    => \%where,
    );
    my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind;
        # $dbix->dump_query($sql, @bind);
    return ($sql, @bind);
}

=begin # unused methods:

# map of lab-section => test name => XML element name
sub get_lab_test_to_xml_element_map { # only needed if using xml element names
    my @data = <DATA>; # p @data;

    my %map;
    for (@data) {
        chomp; # say $_;
        my ($section, $test_name, $element_name) = split ':', $_;
            # p [$section, $test_name, $element_name];
        next unless $section;
        $map{$section}{$test_name} = $element_name;
    } # p \%map;
    return \%map;
}

# formats repeating request blocks (only needed if using xml element names):
sub format_request {
    my $r = shift; # p $r;

    my %data;
    { # registration/demographics:
        my $ref = $r->{Demographics};
        my %h = (
            nhs_number    => $ref->{nhs_number},
            date_of_birth => $ref->{dob},
            last_name     => $ref->{last_name},
            first_name    => $ref->{first_name},
            gender        => $ref->{gender},
        );
        $data{Registration} = \%h;
    } # p \%data;
    return \%data;
}

# for use if date(time) value is an object:
sub transform_datetime_to_string {
    my $r = shift;
    return unless ref $r eq 'HASH';

    for my $key ( keys %$r ) {
        if ( ref $r->{$key} eq 'HASH' ) { # recursive call for hashrefs
            transform_datetime_to_string( $r->{$key} );
        }
        if ( ref $r->{$key} =~ /^DateTime/ ) {
            $r->{$key} = sprintf "%s", $r->{$key}; # quote to force to string
        }
    }
}
=cut

__DATA__
CONSENT:consent_date:date-of-consent
CONSENT:consent_form_version:name-and-version-of-consent-form
CONSENT:consent_taken:consent-given
CONSENT:info_sheet_version:name-and-version-of-participant-information-sheet
CONSENT:consent_q1:consent-question-1
CONSENT:consent_q2:consent-question-2
CONSENT:consent_q3:consent-question-3
CONSENT:consent_q4:consent-question-4
CONSENT:consent_q5:consent-question-5
CONSENT:consent_q6:consent-question-6
CONSENT:consent_q7:consent-question-7
CONSENT:consent_q8:consent-question-8
CONSENT:consent_q9:consent-question-9
CONSENT:consent_q10:consent-question-10
CONSENT:consent_q11:consent-question-11
CONSENT:consent_q12:consent-question-12
CONSENT:consent_q13:consent-question-13
CONSENT:consent_q14:consent-question-14

SPECIMENS:edta1:DNA Blood Germline
SPECIMENS:pst:LiHep Plasma
SPECIMENS:paxgene_rna:RNA Blood
SPECIMENS:sst:Serum
SPECIMENS:handling_protocol:Laboratory Method

STORAGE:vial_id:Laboratory Sample ID
STORAGE:rack_id:GMC Rack ID
STORAGE:rack_location:GMC Rack Well

DNA:edta1_qc_date:Test Result DateTime
DNA:edta1_qc_type:Test Result Type
DNA:edta1_qc_result:Test Result Value
DNA:edta1_fluidx:Laboratory Sample ID
DNA:edta1_fluidx_vol:Laboratory Sample Volume
DNA:edta1_fluidx_rack_id:GMC Rack ID
DNA:edta1_fluidx_rack_well:GMC Rack Well
DNA:edta1_fluidx_vol:Laboratory Remaining Volume Banked

DISPATCH:consignment_number:GMC Sample Consignment Number
DISPATCH:dna_dispatched:GMC Sample Dispatch Date
DISPATCH:omics_dispatched:GMC Sample Dispatch Date