RSS Git Download  Clone
Raw Blame History
#!/usr/bin/env perl

=begin -------------------------------------------------------------------------
uploads activity data to ftp server
usage: $0 [-t] [-m <months ago>], -t = test mode (skips file upload)
--------------------------------------------------------------------------------
=cut

use Getopt::Std;
getopts('m:t'); # months, testing
our($opt_m,$opt_t); # warn $opt_m; exit;

use strict;
use warnings;

#===============================================================================
my @recipients = qw(douglas raj hmds.secure);
my $duration   = $opt_m || 1; # months ago
#===============================================================================

my $JUST_TESTING = $opt_t || 0; # skips ftp, sets /tmp dir for data file, emails just raj

use lib '/home/raj/perl5/lib/perl5'; # do this before $Bin
use IO::All;
use Data::Dumper;
use Data::Printer;
use DateTime::Format::MySQL;
use IO::Compress::Gzip qw(gzip $GzipError);

use FindBin qw($Bin); # warn $Bin;
use lib "$Bin/../../../lib";
use LIMS::Local::ScriptHelpers;

my $cfg_file = "$Bin/../lib/settings.cfg";
my $settings = Config::Auto::parse($cfg_file); # p $settings;

my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

# get tools from LIMS::Local::ScriptHelpers:
my $sql_lib = $tools->sql_lib();
my $config  = $tools->config();
my $dbix    = $tools->dbix();

my $date = $tools->date_subtract(months => $duration);
my $month = $date->month_name;

my $filename = $tools->script_filename;

my $query = $sql_lib->retr('activity_data'); # p $query;

my $delimiter = '|';

# data NOT required in activity data:
my $skip_orgs = $settings->{skip_organisation_codes}; # p $skip_orgs;
my $skip_samples = $settings->{skip_sample_types}; # p $skip_samples;

# override ST code and charge as normal sample:
my $chargable_trials = $settings->{chargable_trials}; # p $chargable_trials;

# get map of higher-charge molecular sequencing tests:
my $mol_seq_map = _get_mol_seq_map(); # p $mol_seq_map;
# get list of request ids from private hospitals:
my $private_patients_map = _get_private_patients_map(); # p $private_patients_map;
# get list of request ids with BMA/BMAT sample:
my $bone_marrow_map = _get_bone_marrow_requests_map(); # p $bone_marrow_map;
# local network locations:
my @local_network_locations = _get_local_network_locations(); # p @local_network_locations;
# skipped organisation codes => source name map:
my $organisation_name = _get_skipped_locations(); # p $organisation_name;

my @rows = ();
my %bmaseq; # to keep track of requests where new row generated for BMASEQ

my %log = (
	unsourced => [], # initialise to avoid possible undef value error later
	failed    => [],
);

my $result = $dbix->query($query, $duration, $duration); # needs duration twice

ROW:
while (my $vars = $result->hash) { # p $vars; next;
        # next if $vars->{presentation} eq 'HTS myeloid only';
    my $request_id = $vars->{id}; # p $request_id; # next unless $request_id == 363079;
    my $org_code   = $vars->{organisation_code};
    my $lab_no     = join '/', $vars->{request_number}, ($vars->{year} - 2000);

    # log & skip unknown referral_source:
    if ( grep $org_code eq $_, @$skip_orgs ) {
        my %h = (
            labno  => $lab_no,
            source => $organisation_name->{$org_code},
        );
        $h{trial} = $vars->{trial_name} if $vars->{trial_name};
        push @{ $log{unsourced} }, \%h;
        next ROW;
    }
    # skip sample types:
	next ROW if grep $vars->{sample_code} eq $_, @$skip_samples;

    # log failures (unknown clinicians for non-study requests):
    if ( $vars->{referrer_code} =~ /9999998\Z/ && $vars->{category} ne 'ST' ) {
        next ROW if $log{seen}{$request_id}++; # duplicates if request has >1 specimen
        push @{ $log{failed} }, $lab_no;
        my $location = $vars->{display_name};
        $log{unknown}{$location}++;
    }

    # non-NHS sources unless in trial/study:
	if ( $vars->{organisation_code} =~ /^NT\d{3}|8EQ15/ ) {
		$vars->{category} = '04' unless $vars->{category} eq 'ST';
	}
    # private patient in NHS hospital:
	elsif ( $private_patients_map->{$request_id} ) {
		$vars->{category} = '02'; # p $request_id;
	}
    { # get lab-tests:
        my $lab_tests = _get_lab_tests($request_id);
        $vars->{request_lab_tests} = $lab_tests;
    }

    # apply any local billing rules before upload:
    apply_local_rules($vars);

    # generate data row:
    my $data = parse_data($vars);
    push @rows, $data;
}

my $cfg = $settings->{activity_data_server};

my $local_filename = sprintf '%s/hmds.dat', $JUST_TESTING
	? '/tmp' # override path for testing
	: $cfg->{path_to_datafile};

my $content = join "\n", @rows;
$content > io($local_filename);

unless ($JUST_TESTING) {
	my %args = (
		local_filename  => $local_filename,
		remote_filename => $cfg->{remote_filename},
		server_addr     => $cfg->{remote_address},
		username        => $cfg->{username},
		password        => $cfg->{password},
	);

    # ftp file (returns str on failure, undef on success):
    my $rtn = $tools->ftp_file(\%args);

    if ($rtn) {
        $tools->mail_admin({ script => $0, msg => $rtn });
        die "$0 - $rtn\n"; # dump to logs
    }
    print "$filename reports data for $month successfully uploaded\n"; # for cron log
    # archive activity data file:
    archive_data_file();
}

my @no_referrer_src = map "$_: $log{unknown}{$_}",
    reverse sort { $log{unknown}{$a} <=> $log{unknown}{$b} }
    keys %{ $log{unknown} };

# calculate some vars for email message:
my $total_requests   = scalar @rows;
my $number_unsourced = scalar @{ $log{unsourced} };
my $number_failed    = scalar @{ $log{failed} };
my $percent_failed   = eval { 100 * $number_failed / $total_requests };
my $no_referrer_reqs = join "\n\t", @{ $log{failed} };
my $no_referrer_src  = join "\n\t", @no_referrer_src;

my $unsourced_labnos = get_unsourced_requests($log{unsourced});

# send emails to recipients:
my $message
    = sprintf qq!Activity data file created successfully (%s entries).\n\n!
    . qq!Unknown/unwanted source: %s\n\t%s\n\n!
    . qq!Unknown referrer: %s (%.2f%%)\n\t%s\n\n!
    . qq!Sources for unknown referrer:\n\t%s!,
        $total_requests,
        $number_unsourced,
        $unsourced_labnos,
        $number_failed,
        $percent_failed || 0,
        $no_referrer_reqs,
        $no_referrer_src; # print $message . "\n"; # exit;

my %mail = (
	config  => $config,
	message => $message,
	subject => 'Activity data for month ' . $month,
); # p %mail; exit;

$tools->send_mail(\%mail, \@recipients);

sub parse_data {
    my $vars = shift; # hashref

    my $line = join $delimiter, (
		'HMDS',
        'H' . $vars->{request_number} . '/' . ( $vars->{year} - 2000 ),
		uc $vars->{last_name},
		( join ' ', map ucfirst $vars->{$_}, grep $vars->{$_}, qw/first_name middle_name/ ),
		$vars->{unit_number}, # have to handle default unknown
		$vars->{dob} || '',   # already converted to dmy('/') in apply_local_rules()
		$vars->{age} || '',
		$vars->{gender} || 'U',
		$vars->{organisation_code},
		$vars->{referrer_code},
		$vars->{organisation_code} =~ /\A(RR8|RAE)/ ? $vars->{hospital_department_code} : '',
		'HM' . $vars->{sample_code},
		'HM' . $vars->{sample_code},
		$vars->{category},
		$vars->{on_call},
		DateTime::Format::MySQL->parse_datetime($vars->{created_at})->dmy('/'),
        DateTime::Format::MySQL->parse_datetime($vars->{created_at})->strftime('%d/%m/%Y:%H:%M'),
        '', # external ref - not used
		$vars->{nhs_number}	|| '',
	);

    return $line;
}

sub apply_local_rules {
    my $vars = shift; # hashref

	my $request_id = $vars->{id};
    my $lab_tests  = $vars->{request_lab_tests};

	if (
        $vars->{first_name} =~ /\A(lgi|pin)\Z/i ||
        $vars->{last_name}  =~ /\Ahtg\d+/i
    ) {
		$vars->{first_name} = uc $vars->{first_name};
	}

	# $vars->{unit_number} = '' if $vars->{unit_number} eq 'UNKNOWN';

	# downstream system can't handle d.o.b eg 1/1/1000, replace with NULL:
	if ( my $dob = $vars->{dob} ) {
		my $date = DateTime::Format::MySQL->parse_date($dob);
		if ( $date->year <= 1900 ) { # p $dob;
			$vars->{dob} = $vars->{age} = undef; # p @{$vars}{'dob','age'};
		}
		else { # to save rpt D::F::MySQL in parse_data():
			$vars->{dob} = $date->dmy('/'); # convert to activity data format
		}
	}

    # avoid uninitialized value error:
	map $vars->{$_} ||= '', qw(trial_name presentation);

    # haematology needs to be clinical not laboratory:
    $vars->{hospital_department_code} =~ s/823/303/;

    # Castleford & Normanton DH now RGX18, unless Dermatology which has its own code:
    if ( $vars->{organisation_code} eq 'RXF07' ) {
        if ( # examine both speciality & specimen:
            $vars->{hospital_department_code} == 330 ||
            $vars->{sample_description} =~ /^skin/i
        ) {
            $vars->{organisation_code} = '5N301D';
        }
        else {
            $vars->{organisation_code} = 'RGX18';
        }
    }
	# Birmingham Royal Orthopaedic Hospital - handles Leeds cases so don't charge:
    elsif ( $vars->{organisation_code} eq 'RRJ05' ) {
		$vars->{category} = 'ST';
	}

    { # branch => parent practices map:
		my $org_code = $vars->{organisation_code};
		if ( my $parent = $settings->{location_codes_map}->{$org_code} ) {
			$vars->{organisation_code} = $parent;
		}
	}

    # maybe change sample type based on presentation or test type - PB only:
	if ( $vars->{sample_code} =~ /^PB/ ) {
    	my $presentation = $vars->{presentation};
		my $set_code_map = $settings->{set_codes_map};

        # force specimen code to PBSEQ if at least one molecular sequencing test:
        if ( grep $mol_seq_map->{$_}, @$lab_tests ) { # warn '=' x 30;
            $vars->{sample_code} = 'PBSEQ'; # p $request_id;
        }
		elsif ( my $set_code = $set_code_map->{$presentation} ) { # eg PB => PBCML:
			# $vars->{sample_code} .= $set_code if $vars->{sample_code} eq 'PB';
			$vars->{sample_code} =~ s/^PB$/${set_code}/; # p $vars->{sample_code};
		}
	}
    # override 01 category for research PB samples from LTH source:
    if ( $vars->{sample_code} eq 'RXPB' && $vars->{organisation_code} =~ /^RR8/) {
        $vars->{category} = 'ST';
    }

    # presentation / screened as:
#===============================================================================
	# Outreach:
    if ( $vars->{presentation} =~ /Outreach/ ) { # billed directly now
        $vars->{category} = 'ST';
    }
    # NCG PNH (PB & BMAT):
  # TODO: change to /^PNH NCG/ after April 2019 run:
    elsif ( $vars->{presentation} =~ /^(?:NCG )?PNH(?: NCG)?$/ ) { # billed directly now
        $vars->{category} = 'ST';
    }
    #elsif ( $vars->{presentation} eq 'HTS myeloid only' ) { # until April 2017:
    #    $vars->{sample_code} = 'BMASEQ' unless
    #        grep $vars->{parent_organisation_id} eq $_, @local_network_locations;
    #}
#===============================================================================

    # HTS myeloid test (from Apr/17) needs to generate a new request for all
    # locations if HTS myeloid requested *in addition to* any other test(s)
    # except molecular srsf2 (which is also part of HTS section but never
    # requested individually) and hts_quantification & dna_extraction, unless
    # screened as HTS myeloid only (which gets BMASEQ sample only):
    if ( grep $_ eq 'hts_myeloid', @$lab_tests ) { # p $request_id;
        my @non_hts_myeloid_tests = grep { # exclude these:
            $_ !~ /^(srsf2|hts_myeloid|dna_extraction|hts_quantification)/
        } @$lab_tests; # p @other_tests;
        # make a new row if any other tests, not screened as 'HTS myeloid only'
        # and not aleady done for this request (ie has multiple samples):
        my $require_new_row = (
            ( scalar @non_hts_myeloid_tests )                   # any other tests
            && ( $vars->{presentation} ne 'HTS myeloid only' )  # not screened as
            && ( ! $bmaseq{$request_id}++ )                     # not already done
        );
        if ( $require_new_row ) { # warn 'require new row';
            # clone data and change sample type to BMASEQ:
            my $ref = LIMS::Local::Utils::clone $vars;
            $ref->{sample_code} = 'BMASEQ'; # p $ref;
          # !!! this action will skip all further local rules on this new sample:
            push @rows, parse_data($ref);
        }
        # change existing sample code to BMASEQ if we don't already have one:
        elsif (! $bmaseq{$request_id} ) { # warn 'no previous BMASEQ';
            $vars->{sample_code} = 'BMASEQ';
        }
        # ignore, will retain existing sample type:
        # else { warn 'no new row and has previous BMASEQ' }
    } # p @rows;

    # EQA samples:
    if ( $vars->{last_name} eq 'ceqas' && length $vars->{first_name} == 2 ) {
        $vars->{category} = 'ST';
    }

	# set category to ST for PB if accompanied by BMA[T] (skip if already set to ST):
	if ( $vars->{sample_code} eq 'PB' && $vars->{category} ne 'ST' ) {
		$vars->{category} = 'ST' if $bone_marrow_map->{$request_id};
	}

    # trial sample ST code overriden and billed as normal:
    if ( grep $vars->{trial_name} eq $_, @$chargable_trials ) {
        $vars->{category} = '01'; # override ST code
    }

    # FLAIR trial:
    if ( $vars->{presentation} =~ /FLAIR/ || $vars->{trial_name} =~ /CLL FLAIR/ ) {
        my @flair_followups = ( # FLMRD
            'FLAIR follow-up (I/IR/I+V/unstated)',
            'FLAIR MRD confirmation/EoT',
            'FLAIR response BM',
        );
		# set default charge to ST (all samples):
		$vars->{category} = 'ST';
        my $sample = $vars->{sample_code};

        # baseline requests:
        if ( $vars->{presentation} eq 'FLAIR baseline' ) {
            # require BSL sample code & category 01 if BMA(T) or PB without a BMA(T):
			do { $vars->{sample_code} = 'FLBSL'; $vars->{category} = '01' }
				if $sample =~ /^BMA/
					|| ( $sample =~ /^PB/ && ! $bone_marrow_map->{$request_id} );
        }
        # disease progression:
        elsif ( $vars->{presentation} eq 'FLAIR disease progression' ) {
            if ( $sample =~ /^BMA/ ) { # should never get BM for this screen
                $vars->{sample_code} = 'FLMRD'; $vars->{category} = '01';
            }
            # PB only (ie without a BMA/T - don't want FLMRD as well as FLRES:
            elsif ( $sample =~ /^PB/ && ! $bone_marrow_map->{$request_id} ) {
                $vars->{sample_code} = 'FLMRD'; $vars->{category} = '01'
            }
        }
        # other follow-ups:
        elsif ( grep $vars->{presentation} eq $_, @flair_followups ) {
            # BMA/T:
            if ( $sample =~ /^BMA/ ) { # should never get BM for these screens
                $vars->{sample_code} = 'FLRES'; $vars->{category} = '01';
            }
            # PB only (ie without a BMA/T - don't want FLMRD as well as FLRES:
            elsif ( $sample =~ /^PB/ && ! $bone_marrow_map->{$request_id} ) {
                $vars->{sample_code} = 'FLMRD'; $vars->{category} = '01'
            }
        }
        # warn Dumper [$request_id, $vars->{sample_code}, $vars->{category} ];
    }
}

sub get_unsourced_requests {
    my $data = shift; # arrayref of hashrefs (labno & trial)

    my @d = map {
        sprintf '%s %s [%s]',
            $_->{labno},
            $_->{source},
            $_->{trial} || 'WILL NOT BE BILLED';
    } @$data;
    return join "\n\t", @d;
}

sub archive_data_file {
    my $data_file = $cfg->{path_to_datafile} . '/hmds.dat';
    # make sure it exists:
    if (-e $data_file) {
        my $o = io($data_file);

        my $input  = $o->name; # p $input;
        # compress and archive:
        my $output = sprintf '%s/%s_%02d_hmds.dat.gz',
            $cfg->{path_to_datafile}, $date->year, $date->month;

        my $status = gzip $input => $output, Minimal => 1 # avoids full path info
            or warn "$0 - gzip failed: $GzipError\n";

        $o->unlink; # archived if has data, deleted anyway
    }
}

sub _get_lab_tests {
    my $request_id = shift; # p $request_id;

    my $sql = $sql_lib->retr('activity_request_lab_tests');
    my $ref = $dbix->query($sql, $request_id)->flat; # aref
    return $ref;
}

# some request.id maps:
sub _get_mol_seq_map { # only returning map of molecular_sequencing_tests now:

	my $test_names = $settings->{molecular_sequencing_tests}; # p $test_names;

	# my $sql = $sql_lib->retr('activity_molecular_sequencing'); # p $sql;
	# my $map = $dbix->query($sql, $duration, $duration, @$test_names)->map;
    my %map = map { $_ => 1 } @$test_names; # p %map;
	return \%map;
}

sub _get_bone_marrow_requests_map {
    my $sql = $sql_lib->retr('activity_bone_marrow_requests');
	my $map = $dbix->query($sql, $duration, $duration)->map;
	return $map;
}

sub _get_private_patients_map {
    my $sql = $sql_lib->retr('activity_private_patients');
	my $map = $dbix->query($sql, $duration, $duration)->map;
	return $map;
}

sub _get_skipped_locations {
    my $map = $dbix->select( 'referral_sources',
        [ 'organisation_code', 'display_name' ],
        { organisation_code => { -in => $skip_orgs } },
    )->map;
    return $map;
}

sub _get_local_network_locations {
    my $lnl = $dbix->select('local_network_locations', 'parent_id')->flat;
    return @$lnl;
}

sub _has_marrow { # replaced with _get_bone_marrow_requests()
#	my $request_id = shift;

#	$dbix->query($has_marrow, $request_id)->into(my $result); # warn Dumper [$request_id, $result];
#	return $result;
=begin
	if ($result) {
		my $data = $dbix->query(
			q!select r.request_number, s.description, d.name, rs.display_name
			from requests r join request_report_view rr on rr.request_id = r.id
			join diagnoses d on rr.diagnosis_id = d.id
			join patient_case pc on r.patient_case_id = pc.id
			join referral_sources rs on pc.referral_source_id = rs.id
			join request_initial_screen ris on ris.request_id = r.id
			join screens s on ris.screen_id = s.id
			where r.id = ?!, $request_id)->array;
		print $fh Tr( td($data));
	}
=cut
}