P3X GitList

#!/usr/bin/perl

=begin -------------------------------------------------------------------------
generates 3 files for results server and ppm:
*.001 = new authorisations
*.002 = diagnosis updates; new/revised results summary (post-authorisation)
*.003 = previous reports for changed pid details

includes:
results: RR8 cases only, with NHS/GUM number
ppm: RR8, RWY, RCD & RAE (not GUM cases)

excludes:
* screened as Chimerism samples (CHI[ABM]), Molecular miscellaneous & Community monitoring
* null NHS Number unless FName = LGI

run every day via cron - does not generate file if no reports so can run over
weekend/public hols

*** UPLOADED BY RESULTS SERVER AT 7AM - CRON NEEDS TO COMPLETE BEFOREHAND

PPM FTP server connection problems handled by non-fatal error() so results feed
can complete.
=cut

use Getopt::Std;
getopts('d:r:p:t'); # days, results_active, ppm_active, testing,
our($opt_d,$opt_r,$opt_p,$opt_t); # warn $opt_d; exit;

use strict;
use warnings;

my $JUST_TESTING = $opt_t || 0; # skips ftp, puts data files in /tmp with .PPM & .RESULTS suffixes

################################################################################
my $results_active = $opt_r // 1; # activate/deactivate results server feed
my $ppm_active     = $opt_p // 1; # activate/deactivate ppm server feed
my $delta          = $opt_d // 1; # days for date_sub calculations
################################################################################

use lib '/home/raj/perl5/lib/perl5';
use IO::All;
use Try::Tiny;
use Config::Auto;
use Data::Dumper;
use DateTime::Format::MySQL;
use IPC::System::Simple qw(run);
use Text::Wrap qw($columns fill); $columns = 68;

use FindBin qw($Bin); # warn $Bin;
use lib "$Bin/../../../lib";
use LIMS::Local::ScriptHelpers;

my $cfg_file = "$Bin/../lib/settings.cfg";
my $settings = Config::Auto::parse($cfg_file); # warn Dumper $settings;

# get tools from LIMS::Local::ScriptHelpers:
my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

my $sql_lib = $tools->sql_lib();
my $config  = $tools->config();
my $dbix    = $tools->dbix();
my $today   = $tools->time_now(); # warn $today->dmy;

# what date to get data from:
my $ref_date = $tools->date_subtract( days => $delta )->ymd; # warn $ref_date;

# global var to hold new cases request.ids for use in revisions ('NOT IN')
my $AUTHORISED_IDS; # defined in set_authorised_ids()

# options for 3 data files
# rh = request_history; rdh = request_diagnosis_history; p = patients;
# rrs = request_results_summaries
my %opts = (
    new_cases => {
        where => {
            "DATE_FORMAT(rh.`time`, '%Y-%m-%d')" => $ref_date,
        },
        file_id => 1,
    },
    revisions => {
        where => {
			'r.id' => { -not_in => get_authorised_ids() }, # will be set in new_cases
			-or => [
				"DATE_FORMAT(rdh.`time`, '%Y-%m-%d')" => $ref_date,
				"DATE_FORMAT(rrs.`time`, '%Y-%m-%d')" => $ref_date,
			],
        },
        file_id => 2,
    },
    demographics => {
        where => {
            "DATE_FORMAT(rh.`time`, '%Y-%m-%d')" => { '<' => $ref_date },
            "DATE_FORMAT(p.`updated_at`, '%Y-%m-%d')" => $ref_date,
        },
        file_id => 3,
    },
);

my $file_header = sprintf $settings->{header},
    $today->day, $today->month, $today->year, $today->hour, $today->min;
my $file_footer = join "\n", @{ $settings->{footer} };

my $data_filename = sprintf 'hmds_%s%02d%02d',
    $today->year, $today->month, $today->day;

my $query = $sql_lib->retr('results_and_ppm');

# go:
DATAFILE:
# for my $type ( sort by_fileId keys %opts ) { warn $type;
# hash order now matters - revisions MUST come AFTER new_cases:
for my $type ( qw/ new_cases revisions demographics / ) { # warn $type;
	my $vals = $opts{$type};

    # query much faster passing list of request_ids to 'WHERE req.id IN (??)':
    my $request_ids = _get_request_ids($vals->{where}); # arrayref
	# set list of new cases request.ids for use in revisions ('NOT IN'):
	set_authorised_ids($request_ids) if $type eq 'new_cases';

    next DATAFILE unless @$request_ids;

    # get data maps - much faster than tying vertical tables into main query:
    my $data_map = _get_data_map($request_ids); # warn Dumper $data_map;

    my %data = ( # reset:
        results => undef, # not strictly required, but to show data keys
        ppm     => undef,
    );

    my $result = $dbix->query($query, @$request_ids);

    ROW:
    while (my $vars = $result->hash) { # warn Dumper $vars; next;
        my $is_gum_request = ( # so ppm can skip GUM requests
            lc $vars->{first_name} eq 'lgi' && $vars->{clinical_details} eq 'HIV'
        );

        # some data fields require manipulation for template:
        _process_data($vars, $data_map); # warn Dumper $vars;

		# results server requires only LTH cases, with nhs_number or GUM id:
		if ( $vars->{organisation_code} =~ /^RR8/
                && ( $vars->{nhs_number} || $is_gum_request ) ) {
            my $dataset = $tools->process_template('cron/results.tt', $vars);
			push @{ $data{results} }, $dataset; # warn $dataset;
		}
		# ppm doesn't want GUM samples but does want null NHS numbers:
		unless ($is_gum_request) {
            my $dataset = $tools->process_template('cron/ppm.tt', $vars);
			push @{ $data{ppm} }, $dataset; # warn $dataset;
		}
    } # warn Dumper \%data;

    # PPM data:
    if ( $data{ppm} && $ppm_active ) {
        my $ppm_data = join "\n", @{ $data{ppm} };

        my $file = sprintf '%s.%03d', $data_filename, $vals->{file_id};
        my $file_path = '/tmp/' . $file;

        if ($JUST_TESTING) { $file_path .= '.PPM' } # add suffix for testing

        my $contents = join "\n", $file_header, $ppm_data, $file_footer;
        $contents > io($file_path); # warn $contents;

        # ftp ppm file:
        unless ($JUST_TESTING) {
            my %args = (
                local_filename  => $file_path,
                remote_filename => $file,
            );
            _ftp_file(\%args);
        }
    }
    # results data:
    if ( $data{results} && $results_active ) {
        my $results_data = join "\n", @{ $data{results} };

        # set tmp file if in test mode:
        my $path = $JUST_TESTING
            ? '/tmp' : $settings->{results_server}->{path_to_datafile};

        my $file = sprintf '%s/%s.%03d',
            $path, $data_filename, $vals->{file_id};

        if ($JUST_TESTING) { $file .= '.RESULTS' } # add suffix for testing

        my $contents = join "\n", $file_header, $results_data, $file_footer;
        $contents > io($file); # warn $contents;
        chmod 0664, $file; # so 'hilis' user can delete
    }
}

# archive results file:
unless ($JUST_TESTING) {
	my $path_to_datafile = $settings->{results_server}->{path_to_datafile};
	my $path_to_archive  = $settings->{results_server}->{path_to_archive};

    # make sure it exists otherwise get error trying to tar non-existent file
    if (grep -e "$path_to_datafile/$data_filename.00$_", (1..3) ) {
        my $tar_file = sprintf '%s/%s.%02d.%02d.tar',
            $path_to_archive,
            $today->year, $today->month, $today->day;

		chdir $path_to_datafile; # so we can use relative path for source directory

#		system( sprintf 'tar -cf %s %s.*', $tar_file, $data_filename );
        try { # using relative $data_file.* not full path:
            run sprintf 'tar -cf %s %s.*', $tar_file, $data_filename;
        }
        catch {
            warn $_; # $_ not $@
        };

#		system( sprintf "gzip $tar_file" ); # compress tar file
        if (-e $tar_file) {
            try {
                run "gzip $tar_file";
            }
            catch {
                warn $_; # $_ not $@
            };
        }
    }
}

# sub by_fileId { return $opts{$a}->{file_id} <=> $opts{$b}->{file_id} }

# copy values into $AUTHORISED_IDS arrayref (rather than overwriting it):
sub set_authorised_ids { @{$AUTHORISED_IDS} = @{ $_[0] } }

sub get_authorised_ids { $AUTHORISED_IDS ||= [0] } # needs an arrayref with content

# some $vars data fields require manipulation for template:
sub _process_data {
    my ($vars, $data_map) = @_;

    my $request_id = $vars->{request_id};
    my $specimen_code = $data_map->{specimen}->{$request_id}->{specimen_code};

    $vars->{request_history}  = $data_map->{history}->{$request_id};
    $vars->{specimen_details} = $data_map->{specimen}->{$request_id}->{description};
    $vars->{specimen_summary} = _specimen_summary($specimen_code);

	# re-arrange HIV's data:
	if ( $vars->{last_name}  =~ /^\d+$/ && uc $vars->{first_name} eq 'LGI' ) {
		$vars->{unit_number} = 'GU' . $vars->{unit_number};
		$vars->{first_name}  = $vars->{last_name};
	}

	{ # remove 'Unknown clinician/practitioner':
		$vars->{referrer} = '' if $vars->{referrer_code} =~ /[CG]9999998/;
	}
    if ($vars->{referrer}) { # re-format referrer field to add comma (ppm requires, results doesn't care):
        my @ary = split ' ', $vars->{referrer};
        my $inits = pop @ary; # remove 'initials' field
        $vars->{referrer} = join ', ', ( join ' ', @ary ), $inits; # warn $vars->{referrer};
    }
	{ # ensure no line > 68 chars, wrap text, replace blanks, etc:
        my $formatted_fields = $settings->{formatted_fields};
        map $vars->{$_} = _format_text($vars->{$_}), @$formatted_fields;
    }
	{ # generate composite result from lab_section results_summaries:
        my $result_summary_data = $data_map->{result_summary}->{$request_id};
            # warn Dumper $result_summary_data;
        my @results = map {
            _format_text( $_ . ': ' . $result_summary_data->{$_} );
        } keys %$result_summary_data;

        $vars->{results_summary} = ( join $settings->{new_line}, @results )
        || $settings->{indent} . 'N/A';
    }
}

# format text: indent, replace blanks with N/A, max 68 chars / line:
sub _format_text {
	my $str = shift || ''; # needs to exist or uninitialized warnings below

    # put '07|' after of new-line markers:
    my $formatted_newline = $settings->{new_line};

	$str =~ s/^(\s+|\t)//; # remove leading spaces or tabs
	$str =~ s/\|//g; # remove pipes if used!!

	$str ||= 'N/A'; # only applies to @formatted_fields, others already grepped

	my $wrapped_text = fill($settings->{indent}, $settings->{indent}, $str); # Text::Wrap function

	# new 07| in front of newly-wrapped lines:
	$wrapped_text =~ s/\n/$formatted_newline/g; # print $wrapped_text;

	return $wrapped_text;
}

sub _specimen_summary {
	local $_ = shift; # specimen string

	my @title;

	push @title, 'peripheral blood' 	if /PB|CMP/; # CMP excluded
	push @title, 'chimerism' 			if /CHI/; # currently excluding chimerism screen
	push @title, 'bone marrow aspirate' if /BMA$|BMA\,/; # BMA[^T] doesn't work
	push @title, 'bone marrow biopsy' 	if /BMAT|T[B|S][L|P]/;
	push @title, 'tissue biopsy' 		if /[DGLRX][BL|F|SL|U|A]/;
	push @title, 'effusion' 			if /EF/;
	push @title, 'CSF' 					if /CF/;
	push @title, 'slide' 				if /HS/;
	push @title, 'serum' 				if /SE/;
	push @title, 'urine' 				if /URI/;

#	my $title = @title > 1 ? join (' & ' => join (', ' => @title[ 0 .. @title - 2 ]), $title[-1]) : $title[0];
	my $title = join '/', @title; # print Dumper ($title, length $title) if length $title > 40;
	$title ||= $_; # so var not empty if specimen doesn't match in regex's

	return ( length $title < 41 )
        ? $title
        : 'multiple blood/marrow/tissue specimens';
}

sub _get_request_ids {
    my $restriction = shift;

    my $excludes  = $settings->{not_ins};   # arrayref
    my $org_codes = $settings->{org_codes}; # arrayref

    # add common restrictions:
	$restriction->{'rh.`action`'} = 'authorised'; # only allow auth'd requests
    $restriction->{'s.description' } = { 'NOT IN' => $excludes };
    $restriction->{'po.parent_code'} = { IN => $org_codes };

    my ($where, @bind) = $dbix->abstract->where($restriction);
    my $stmt = $sql_lib->retr('results_and_ppm_request_ids');

    my $sql = $stmt . $where; # warn $sql; warn Dumper \@bind;
    my $request_ids = $dbix->query($sql, @bind)->flat; # warn Dumper $request_ids;

    return $request_ids;
}

sub _get_data_map {
    my $request_ids = shift;

    my $map = {};

    { # request_specimens:
        my $sql = q!select `request_id`,
            group_concat(`description` separator '/') as 'description',
            group_concat(`sample_code`) as 'code' from `request_specimen` rs
            join `specimens` s on rs.`specimen_id` = s.`id`
            where rs.`request_id` in (??) group by `request_id`!;
        my $result = $dbix->query($sql, @$request_ids);

        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id = $vars->{request_id};

            my %data = (
                specimen_code => $vars->{code},
                description   => $vars->{description},
            );
            $map->{specimen}->{$request_id} = \%data;
        }  # warn Dumper $map->{specimen};
    }
    { # request_history:
        my $national_codes = $settings->{national_codes};

        my $sql = q!select rh.`request_id`, rh.`action`, rh.`time`, u.`username`,
            u.`first_name`, u.`last_name` from `request_history` rh join `users`
            u on rh.`user_id` = u.`id` where rh.`action`
            in ('registered','reported','authorised') and rh.`request_id` in (??)!;
        my $result = $dbix->query($sql, @$request_ids);

        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id = $vars->{request_id};
            my $user_id    = $vars->{username}; # warn $username;
            my $action     = $vars->{action};
            my $name       = join ' ', map ucfirst $_,
                                $vars->{first_name}, $vars->{last_name};

            my %data = (
                datetime  => DateTime::Format::MySQL->parse_datetime($vars->{time}),
                user_code => $national_codes->{$user_id},
                user_name => $name,
            );

            $map->{history}->{$request_id}->{$action} = \%data;
        } # warn Dumper $map->{history};
    }
    { # result_summaries:
        my $sql = q!select rs.`request_id`, rs.`results_summary`, ls.`section_name`
            from `request_result_summaries` rs join `lab_sections` ls on
            rs.`lab_section_id` = ls.`id` where rs.`request_id` in (??)!;
        my $result = $dbix->query($sql, @$request_ids);

        while ( my $vars = $result->hash ) { # warn Dumper $vars;
            my $request_id  = $vars->{request_id};
            my $section     = $vars->{section_name};
            my $result      = $vars->{results_summary};

            $map->{result_summary}->{$request_id}->{$section} = $result;
        } # warn Dumper $map->{result_summary};
    }

    return $map;
}

sub _ftp_file {
    my $args = shift;

    my $cfg = $settings->{ppm_server};

    my %params = (
        local_filename  => $args->{local_filename},
        remote_filename => $args->{remote_filename},
        server_addr     => $cfg->{remote_address},
        username        => $cfg->{username},
        password        => $cfg->{password},
    );

    # ftp file (returns str on failure, undef on success):
    my $rtn = $tools->ftp_file(\%params);

    if ($rtn) { # warn Dumper $rtn;
        $tools->mail_admin({ script => $0, msg => $rtn });
        warn "$0 - $rtn\n"; # dump to logs but don't die!!
    }
    else {
        io($args->{local_filename})->unlink;
    }
}

__DATA__
# new_cases:
SELECT DISTINCT(r.`id`)
FROM requests r
    JOIN `patient_case` pc on r.`patient_case_id` = pc.`id`
    JOIN `patients` p on pc.`patient_id` = p.`id`
    JOIN `referral_sources` rs on pc.`referral_source_id` = rs.`id`
    JOIN `parent_organisations` po on rs.`parent_organisation_id` = po.`id`
    JOIN `request_history` rh on rh.`request_id` = r.`id`
    JOIN `request_initial_screen` ris on ris.`request_id` = r.`id`
    JOIN `screens` s on ris.`screen_id` = s.`id`
    LEFT JOIN `request_diagnosis_history` rdh on rdh.`request_id` = r.`id`
    LEFT JOIN `request_result_summaries` rrs on rrs.`request_id` = r.`id`
WHERE DATE_FORMAT(rh.`time`, '%Y-%m-%d') = ?
 	AND po.parent_code IN ( 'RR8','RAE','RWY' ) AND rh.`action` = 'authorised'
	AND s.description NOT IN (
        'Chimerism sample',
        'Chimerism (CHIM)',
        'Chimerism (CHIA/CHIB)',
        'Molecular miscellaneous',
        'Community monitoring'
    );

# revisions:
SELECT DISTINCT(r.`id`)
FROM requests r
    JOIN `patient_case` pc on r.`patient_case_id` = pc.`id`
    JOIN `patients` p on pc.`patient_id` = p.`id`
    JOIN `referral_sources` rs on pc.`referral_source_id` = rs.`id`
    JOIN `parent_organisations` po on rs.`parent_organisation_id` = po.`id`
    JOIN `request_history` rh on rh.`request_id` = r.`id`
    JOIN `request_initial_screen` ris on ris.`request_id` = r.`id`
    JOIN `screens` s on ris.`screen_id` = s.`id`
    LEFT JOIN `request_diagnosis_history` rdh on rdh.`request_id` = r.`id`
    LEFT JOIN `request_result_summaries` rrs on rrs.`request_id` = r.`id`
WHERE
 	( DATE_FORMAT(rdh.`time`, '%Y-%m-%d') = ?
        OR DATE_FORMAT(rrs.`time`, '%Y-%m-%d') = ? )
    AND po.parent_code IN ( 'RR8','RAE','RWY' ) /* AND 1=1 */
    AND rh.`action` = 'authorised'
    AND s.description NOT IN (
        'Chimerism sample',
        'Chimerism (CHIM)',
        'Chimerism (CHIA/CHIB)',
        'Molecular miscellaneous',
        'Community monitoring'
    );

# demographic changes:
SELECT DISTINCT(r.`id`)
FROM requests r
    JOIN `patient_case` pc on r.`patient_case_id` = pc.`id`
    JOIN `patients` p on pc.`patient_id` = p.`id`
    JOIN `referral_sources` rs on pc.`referral_source_id` = rs.`id`
    JOIN `parent_organisations` po on rs.`parent_organisation_id` = po.`id`
    JOIN `request_history` rh on rh.`request_id` = r.`id`
    JOIN `request_initial_screen` ris on ris.`request_id` = r.`id`
    JOIN `screens` s on ris.`screen_id` = s.`id`
    LEFT JOIN `request_diagnosis_history` rdh on rdh.`request_id` = r.`id`
    LEFT JOIN `request_result_summaries` rrs on rrs.`request_id` = r.`id`
WHERE DATE_FORMAT(p.`updated_at`, '%Y-%m-%d') = ?
    AND DATE_FORMAT(rh.`time`, '%Y-%m-%d') < ?
    AND po.parent_code IN ( 'RR8','RAE','RWY' ) /* AND 1=1 */
    AND rh.`action` = 'authorised'
	AND s.description NOT IN (
        'Chimerism sample',
        'Chimerism (CHIM)',
        'Chimerism (CHIA/CHIB)',
        'Molecular miscellaneous',
        'Community monitoring'
    );