RSS Git Download  Clone
Raw Blame History
#!/usr/bin/perl

=begin -------------------------------------------------------------------------
generates NYCRIS data feed for new and revised diagnoses during previous 1 month
creates pipe-delimited rows, one row per request. SCP file transfer.

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!! for new server: need to manually ssh into server once to add key to known_hosts
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

=cut ---------------------------------------------------------------------------

use strict;
use warnings;

my $JUST_TESTING = 0; # skips file transfer & file archive

use lib '/home/raj/perl5/lib/perl5';
use IO::All;
use Config::Auto;
use Data::Dumper;

use FindBin qw($Bin); # warn $Bin;
use lib $Bin . '/../../../lib';
use LIMS::Local::ScriptHelpers;

#####################################
my $delimiter = q{|}; # pipe        # 
my $duration  = 1; # months ago     #
my $new_line_marker = '[NEW_LINE]'; #
#####################################

my $cfg_file = "$Bin/../lib/settings.cfg";
my $settings = Config::Auto::parse($cfg_file); # warn Dumper $settings;
    
# get tools from LIMS::Local::ScriptHelpers:
my $tools = LIMS::Local::ScriptHelpers->new();

my $sql_lib = $tools->sql_lib();
my $config  = $tools->config(); 
my $dbix    = $tools->dbix();

# get date last month - handles BST:
my $ref_date = $tools->date_subtract(months => $duration); # warn ref_date; exit;

my $data_filename = sprintf 'hmds_%s_%02d.txt',
    $ref_date->year, $ref_date->month; # warn $data_filename;

use constant TMP_DIR => '/tmp'; # where to create temp data file

my @data = ();

# go:
{
    # get request ID's for horizontal (1-to-1) & vertical (1-to-many) tables:
    my $request_ids = get_request_ids(); # warn Dumper $request_ids;
    
    # get request result 1-to-1 data:
    my $query = $sql_lib->retr('nycris_data');
    my $result = $dbix->query($query, @$request_ids);
    
    # get data maps of vertical tables - much faster than tying into main query:
    my $data_map = get_data_map($request_ids); # warn Dumper $data_map;

    while ( my $vars = $result->hash ) { # warn Dumper $vars; next;
        my $request_id = $vars->{request_id};
        # combine horizontal & vertical table data:
        my $row = combine_data($vars, $data_map->{$request_id});
		push @data, $row; # warn $row;
    } # warn Dumper \@data;
}

{ # create output file & push to remote server:
    my $file_path = join '/', TMP_DIR, $data_filename;

    my $contents = join "\n", @data; 
    $contents > io($file_path); # warn $contents;

    # transfer file:
    unless ($JUST_TESTING) {
        my %args = ( local_filename => $file_path );
        transfer_file(\%args);
    }
}

# combine horizontal & vertical table data & return formatted row:
sub combine_data {
    my ($vars, $supplimentary_data) = @_;
    
    { # add data-map fields:
        # result summaries:
        map $vars->{$_} = $supplimentary_data->{result_summary}->{$_},
            qw(immunocytochemistry flow_cytometry fish cytogenetics molecular);
        # reporter:
        map $vars->{$_} = $supplimentary_data->{reporter_data}->{$_},
            qw(reporter_name reporter_code);
        # specimen code(s) & decode(s):
        $vars->{specimen_code}
            = $supplimentary_data->{specimen}->{specimen_code};
        $vars->{specimen_decode}
            = $supplimentary_data->{specimen}->{description};
    }
    
    # tidy up (trim content & substitute new-line markers):
    map { $vars->{$_} = tidy($vars->{$_}) }
        grep $vars->{$_},
            qw(comment immunocytochemistry flow_cytometry fish cytogenetics
                gross_description molecular);
    
    $vars->{gender} ||= 'U';
    
    $vars->{hmds_org_code} = 'RR813';
    
    my @fields = qw(
        request_id
        labno
        request_date
		last_name
        first_name
        middle_name
		unit_number
		dob
		nhs_number
		gender
		referrer
		referrer_code
        location
		organisation_code
		specimen_code
		specimen_decode
        clinical_details
        gross_description
        specimen_quality
        comment
        diagnosis
        icdo3
        flow_cytometry
        immunohistochemistry
        molecular
        cytogenetics
        fish
        reporter_name
        reporter_code
		authorisation_date
        hmds_org_code
	);
    
    no warnings 'uninitialized'; # lots in $vars
    my $row = join $delimiter, map $vars->{$_}, @fields;
    # row needs to start with request ID - in case of unescaped new-line marker:
    die 'malformed row detected' unless $row =~ /\A\d{6,}$delimiter/; 
    return $row;
}

sub get_request_ids {
    my @request_ids = ();
    { # new diagnoses:
        my $sql = $sql_lib->retr('nycris_new_diagnosis_request_ids');
        my $request_ids = $dbix->query($sql, $duration, $duration)->flat;
        push @request_ids, @$request_ids; # warn Dumper $request_ids;
    }    
    { # get revised diagnoses:
        my $sql = $sql_lib->retr('nycris_revised_diagnosis_request_ids');
        my $request_ids = $dbix->query($sql, $duration, $duration)->flat;
        push @request_ids, @$request_ids; # warn Dumper $request_ids;
    }
    # combine new & revised diagnosis request ID's; eliminate duplicates:
    my %request_ids = map { $_ => 1 } @request_ids; # warn Dumper \%request_ids;
    # return unique ID's as arrayref:
    return [ keys %request_ids ];    
}

sub transfer_file { # only called if not $JUST_TESTING
    my $args = shift;
    
    my $cfg = $settings->{nycris_server};
    
    my %params = (
        local_filename => $args->{local_filename},
        server_addr    => $cfg->{remote_address},
        username       => $cfg->{username},
        password       => $cfg->{password},
    );

    # scp file (returns str on failure, undef on success):
    my $rtn = $tools->scp_file(\%params);

    my $script = $tools->script_filename;

    if ($rtn) {
        $tools->mail_admin({ script => $0, msg => $rtn });
        warn "$script - $rtn\n"; # dump to logs but don't die!!
    }
    else {
        archive_and_delete_file($args->{local_filename});
        
        my $month = $ref_date->month_name; # for cron log:
        print "$script reports data for $month succesfully uploaded\n";
    }
}

# archive data file (only called if not $JUST_TESTING):
sub archive_and_delete_file {
    my $src_file = shift; # warn $src_file;

	my $path_to_archive = $settings->{nycris_server}->{path_to_archive};

    # make sure it exists otherwise get error trying to tar non-existant file
    if (-e $src_file ) { # warn 'it exists';
        my $tar_file = sprintf '%s/%s.tar',
            $path_to_archive, $data_filename; # warn $tar_file;
	
		chdir TMP_DIR; # so we can use relative path for source directory

		system( sprintf 'tar -cf %s %s', $tar_file, $data_filename ); # using relative $data_file.* not full path
		system( sprintf "gzip $tar_file" ); # compress tar file
        # delete source file:
        io($src_file)->unlink;
    }
}

sub get_user_code {
    my $username = shift;
    
    return $settings->{national_codes}->{$username}
    || 'H9999998'; # OTHER HEALTH CARE PROFESSIONAL
}

sub get_data_map {
    my $request_ids = shift;
    
    my $map = {};
    
    { # request_specimens:
        my $sql = $sql_lib->retr('nycris_request_specimens'); # warn $sql;
        my $result = $dbix->query($sql, @$request_ids);
        
        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id = $vars->{request_id};
            
            my %data = (
                specimen_code => $vars->{code},
                description   => $vars->{description},
            ); # warn Dumper \%data;
            $map->{$request_id}->{specimen} = \%data;
        }  
    }
    { # reporter data:
        my $sql = q!select rh.request_id, u.username, u.first_name, u.last_name
            from request_history rh join users u on ( rh.user_id = u.id and
            action = 'reported' ) where request_id in (??)!;
            
        my $result = $dbix->query($sql, @$request_ids);

        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id = $vars->{request_id};
            
            my $reporter = join ' ',
                ucfirst $vars->{first_name},
                ucfirst $vars->{last_name};

            my $reporter_code = get_user_code($vars->{username});
            
            my %data = (
                reporter_name => $reporter,
                reporter_code => $reporter_code,
            );
            $map->{$request_id}->{reporter_data} = \%data;
        } # warn Dumper $map->{reporter_data};
    }
    { # result summaries:
        my $sql = $sql_lib->retr('nycris_result_summaries');
        my $result = $dbix->query($sql, @$request_ids);
        
        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id  = $vars->{request_id};
            my $section     = $vars->{section_name};
            my $result      = $vars->{results_summary};              
           
            $section =~ s/ /_/g; # make section suitable for use as hash key 
            $map->{$request_id}->{result_summary}->{lc $section} = $result;
        } # warn Dumper $map->{result_summary}; 
    }
    
    return $map;
}

sub tidy {
    my $str = shift;
    
    # trim:
    $str =~ s/\A\s+//; # leading
    $str =~ s/\s+\Z//; # trailing
    
#    $str =~ s/(\r\n)/$new_line_marker/g; # doesn't capture just \n
    $str =~ s/\r?\n/$new_line_marker/g; # captures both \r\n & \n

    return $str;
}