RSS Git Download  Clone
Raw Blame History
#!/usr/bin/perl

=begin -------------------------------------------------------------------------
generates NYCRIS data feed for new and revised diagnoses during previous 1 month
creates pipe-delimited rows, one row per request. Email file transfer (SCP broken).

command-line: nycris.pl [duration in months - optional]

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!! for new server: need to manually ssh into server once to add key to known_hosts
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

=cut

use Getopt::Std;
getopts('m:t'); # months, testing
our($opt_m,$opt_t); # warn $opt_m; exit;

use strict;
use warnings;

my $JUST_TESTING = $opt_t || 0; # skips file transfer & file archive

use lib '/home/raj/perl5/lib/perl5';
use IO::All;
use Config::Auto;
use IPC::System::Simple qw(capture);
use Data::Printer alias => 'p';

use FindBin qw($Bin); # warn $Bin;
use lib $Bin . '/../../../lib';
use LIMS::Local::ScriptHelpers;

#############################################
my $delimiter = q{|};                       #
my $duration  = $opt_m || 1;   # months ago #
my $new_line_marker = '[NEW_LINE]';         #
#############################################

my $cfg_file = "$Bin/../lib/settings.cfg";
my $settings = Config::Auto::parse($cfg_file); # p $settings;

# get tools from LIMS::Local::ScriptHelpers:
my $tools = LIMS::Local::ScriptHelpers->new();

my $sql_lib = $tools->sql_lib();
my $config  = $tools->config();
my $dbix    = $tools->dbix();

# get date last month - handles BST:
my $ref_date = $tools->date_subtract(months => $duration); # warn ref_date; exit;

my $data_filename = sprintf 'hmds_%s_%02d.txt',
    $ref_date->year, $ref_date->month; # warn $data_filename;

use constant TMP_DIR => '/tmp'; # where to create temp data file

my @data = ();

# go:
{
    # get request ID's for horizontal (1-to-1) & vertical (1-to-many) tables:
    my $request_ids = get_request_ids(); # p $request_ids;

    # get request result 1-to-1 data:
    my $query = $sql_lib->retr('nycris_data');
    my $result = $dbix->query($query, @$request_ids);

    # get data maps of vertical tables - much faster than tying into main query:
    my $data_map = get_data_map($request_ids); # p $data_map;

    while ( my $vars = $result->hash ) { # p $vars; next;
        my $request_id = $vars->{request_id};
        # combine horizontal & vertical table data:
        my $row = combine_data($vars, $data_map->{$request_id});
		push @data, $row; # warn $row;
    } # p \@data;
}

{ # create output file & push to remote server:
    my $file_path = join '/', TMP_DIR, $data_filename;

    my $contents = join "\n", @data;
    $contents > io($file_path); # warn $contents;

    # transfer file:
    unless ($JUST_TESTING) {
        transfer_file({ local_filename => $file_path });
    }
}

# combine horizontal & vertical table data & return formatted row:
sub combine_data {
    my ($vars, $supplimentary_data) = @_;

    { # add data-map fields:
        # result summaries:
        map $vars->{$_} = $supplimentary_data->{result_summary}->{$_},
            qw(immunocytochemistry flow_cytometry fish cytogenetics molecular);
        # reporter:
        map $vars->{$_} = $supplimentary_data->{reporter_data}->{$_},
            qw(reporter_name reporter_code);
        # specimen code(s) & decode(s):
        $vars->{specimen_code}
            = $supplimentary_data->{specimen}->{specimen_code};
        $vars->{specimen_decode}
            = $supplimentary_data->{specimen}->{description};
    }

    # tidy up (trim content & substitute new-line markers):
    map { $vars->{$_} = tidy($vars->{$_}) }
        grep $vars->{$_},
            qw(comment immunocytochemistry flow_cytometry fish cytogenetics
                gross_description molecular);

    $vars->{gender} ||= 'U';

    $vars->{hmds_org_code} = 'RR813';

    my @fields = qw(
        request_id
        labno
        request_date
		last_name
        first_name
        middle_name
		unit_number
		dob
		nhs_number
		gender
		referrer
		referrer_code
        location
		organisation_code
		specimen_code
		specimen_decode
        clinical_details
        gross_description
        specimen_quality
        comment
        diagnosis
        icdo3
        flow_cytometry
        immunohistochemistry
        molecular
        cytogenetics
        fish
        reporter_name
        reporter_code
		authorisation_date
        hmds_org_code
	);

    no warnings 'uninitialized'; # lots in $vars
    my $row = join $delimiter, map $vars->{$_}, @fields;
    # row needs to start with request ID - in case of unescaped new-line marker:
    die 'malformed row detected' unless $row =~ /\A\d{6,}$delimiter/;
    return $row;
}

sub get_request_ids {
    my @request_ids = ();
    { # new diagnoses:
        my $sql = $sql_lib->retr('nycris_new_diagnosis_request_ids');
        my $request_ids = $dbix->query($sql, $duration, $duration)->flat;
        push @request_ids, @$request_ids; # p $request_ids;
    }
    { # get revised diagnoses:
        my $sql = $sql_lib->retr('nycris_revised_diagnosis_request_ids');
        my $request_ids = $dbix->query($sql, $duration, $duration)->flat;
        push @request_ids, @$request_ids; # p $request_ids;
    }
    # combine new & revised diagnosis request ID's; eliminate duplicates:
    my %request_ids = map { $_ => 1 } @request_ids; # p \%request_ids;
    # return unique ID's as arrayref:
    return [ keys %request_ids ];
}

# scp function broken, replaced by email 07/2016
sub transfer_file { # only called if not $JUST_TESTING
    my $args = shift; # p $args;
    my $cfg = $settings->{nycris_server};

    my %params = (
        local_filename => $args->{local_filename},
        server_addr    => $cfg->{remote_address},
        username       => $cfg->{username},
        password       => $cfg->{password},
    );

    # scp file (returns str on failure, undef on success):
    # my $rtn = $tools->scp_file(\%params); scp function broken;
	my $rtn = email_file(); # 05/07/2016

    my $script = $tools->script_filename;

    if ($rtn) {
        $tools->mail_admin({ script => $0, msg => $rtn });
        warn "$script - $rtn\n"; # dump to logs but don't die!!
    }
    else {
        archive_and_delete_file($args->{local_filename});

        my $month = $ref_date->month_name; # for cron log:
        print "$script reports data for $month successfully uploaded\n";
    }
}

sub email_file { # email a password-protected file:
    ( my $zip_filename = $data_filename ) =~ s/txt$/zip/; # warn $zip_filename;
    ( my $subject      = $data_filename ) =~ s/\.txt$//; # warn $subject;

    my @cmd = ('/usr/bin/zip');
    push @cmd, '-q'; # quiet
	push @cmd, '-j'; # junk dir name
    push @cmd, '--password';
    push @cmd, ucfirst $subject; # using subject title as passwd
    # --------------------------------------------------------------------------
	push @cmd, '-'; # only needed to allow piping of content if using capture()
    # --------------------------------------------------------------------------
    push @cmd, TMP_DIR . '/' . $zip_filename;
    push @cmd, TMP_DIR . '/' . $data_filename; # p @cmd;

=begin # method to save zip file to disk & then use io() for $content:
	# system(@cmd);
	# my $content = io->file(TMP_DIR . '/' . $zip_filename)->all;
=cut
	# use IPC::System::Simple to cature zip output:
	my $content = capture(@cmd);
    my %mail = (
        config  => $config,
        subject => $subject,
        filename   => $zip_filename,
        attachment => $content,
    ); # p %mail;
    $tools->send_mail(\%mail, [ $settings->{ncrs_data}->{leeds}->{recipient} ]);
	return 0; # to trigger archive & delete
}

# archive data file (only called if not $JUST_TESTING):
sub archive_and_delete_file {
    my $src_file = shift; # warn $src_file;

	my $path_to_archive = $settings->{nycris_server}->{path_to_archive};

    # make sure it exists otherwise get error trying to tar non-existant file
    if (-e $src_file ) { # warn 'it exists';
        my $tar_file = sprintf '%s/%s.tar',
            $path_to_archive, $data_filename; # warn $tar_file;

		chdir TMP_DIR; # so we can use relative path for source directory

		system( sprintf 'tar -cf %s %s', $tar_file, $data_filename ); # using relative $data_file.* not full path
		system( sprintf "gzip $tar_file" ); # compress tar file
        # delete source file:
        io($src_file)->unlink;
    }
}

sub get_user_code {
    my $username = shift;

    return $settings->{national_codes}->{$username}
    || 'H9999998'; # OTHER HEALTH CARE PROFESSIONAL
}

sub get_data_map {
    my $request_ids = shift;

    my $map = {};

    { # request_specimens:
        my $sql = $sql_lib->retr('nycris_request_specimens'); # warn $sql;
        my $result = $dbix->query($sql, @$request_ids);

        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id = $vars->{request_id};

            my %data = (
                specimen_code => $vars->{code},
                description   => $vars->{description},
            ); # warn Dumper \%data;
            $map->{$request_id}->{specimen} = \%data;
        }
    }
    { # reporter data:
        my $sql = q!
            select rh.request_id, u.username, u.first_name, u.last_name,
                ur.registration_number
            from request_history rh join users u on rh.user_id = u.id
                left join user_registration ur on ur.user_id = u.id
            where rh.action = 'reported' and request_id in (??)!;

        my $result = $dbix->query($sql, @$request_ids);

        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id = $vars->{request_id};

            my $reporter = join ' ',
                ucfirst $vars->{first_name},
                ucfirst $vars->{last_name};

            my $reporter_code = $vars->{registration_number} || 'H9999998'; # OTHER HEALTH CARE PROFESSIONAL;

            my %data = (
                reporter_name => $reporter,
                reporter_code => $reporter_code,
            );
            $map->{$request_id}->{reporter_data} = \%data;
        } # warn Dumper $map->{reporter_data};
    }
    { # result summaries:
        my $sql = $sql_lib->retr('nycris_result_summaries');
        my $result = $dbix->query($sql, @$request_ids);

        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id  = $vars->{request_id};
            my $section     = $vars->{section_name};
            my $result      = $vars->{results_summary};

            $section =~ s/ /_/g; # make section suitable for use as hash key
            $map->{$request_id}->{result_summary}->{lc $section} = $result;
        } # warn Dumper $map->{result_summary};
    }

    return $map;
}

sub tidy {
    my $str = shift;

    # trim:
    $str =~ s/\A\s+//; # leading
    $str =~ s/\s+\Z//; # trailing

#    $str =~ s/(\r\n)/$new_line_marker/g; # doesn't capture just \n
    $str =~ s/\r?\n/$new_line_marker/g; # captures both \r\n & \n

    return $str;
}