#!/usr/bin/perl
=begin
--------------------------------------------------------------------------------
generates NCRS XML data feed for new and revised diagnoses [or all icdo3 '/3' reports
authorised or revised - change nycris to ncrs in libray.sql] during previous 1 month
SCP file transfer
manual command-line:
ncrs_cosd.pl [duration in months - optional (defaults to 1)]
scp /tmp/<filename> hmds@163.160.247.17:<filename>
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
!! for new server: need to manually ssh into server once to add key to known_hosts
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
online reference:
ICDO3 topographical codes: http://codes.iarc.fr/topography
ICDO3 morphological codes: http://codes.iarc.fr/codegroup/2
--------------------------------------------------------------------------------
=cut
my $JUST_TESTING = 1; # skips file transfer & file archive
use lib '/home/raj/perl5/lib/perl5';
use XML::Simple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here
use Data::Printer;
use Config::Auto;
use Data::Dumper;
use Modern::Perl;
use DateTime; # for last_day_of_month()
use IO::All;
use FindBin qw($Bin); # warn $Bin;
use lib $Bin . '/../../../lib';
use LIMS::Local::ScriptHelpers;
#############################################
my $delimiter = q{|}; #
my $duration = $ARGV[0] || 1; # months ago #
my $new_line_marker = '[NEW_LINE]'; #
#############################################
my $cfg_file = "$Bin/../lib/settings.cfg";
my $settings = Config::Auto::parse($cfg_file); # p $settings;
# get tools from LIMS::Local::ScriptHelpers:
my $tools = LIMS::Local::ScriptHelpers->new();
my $sql_lib = $tools->sql_lib();
my $config = $tools->config();
my $dbix = $tools->dbix();
# dates ========================================================================
# get date last month - handles BST:
my $ref_datetime = $tools->date_subtract(months => $duration);
my $timestamp = $tools->time_now->datetime;
my $start_date = $ref_datetime->strftime('%Y-%m-01');
my $end_date = DateTime->last_day_of_month(
year => $ref_datetime->year,
month => $ref_datetime->month,
)->ymd; # p $start_date; p $end_date;
# ==============================================================================
my $data_filename = sprintf 'hmds_%s_%02d.txt',
$ref_datetime->year, $ref_datetime->month; # p $data_filename;
# $data_filename = 'hmds_test_cosd.xml';
use constant HMDS_ORG_CODE => 'RR813'; # OrgCodePathReport & OrgCodeSubmitter
use constant TMP_DIR => '/tmp'; # where to create temp data file
# decode for biopsy_site info:
my $anatomical_site_map =
$dbix->select('anatomical_sites', [qw(site_name snomed)])->map;
# p $anatomical_site_map;
# for non-histology specimens (sample_code NOT RLIKE '[DGLRX]([BS]L|F|U)'):
# NB these are ICDO3 topography codes for site of tumour origin
my %specimen_topography = (
BMA => 'C42.1',
BMAT => 'C42.1',
CF => '',
CHIA => 'C42.0', # should never issue new diagnosis on CHIA
CHIB => 'C42.0', # should never issue new diagnosis on CHIB
CHIM => 'C42.0', # should never issue new diagnosis on CHIM
CMP => '', # should never issue new diagnosis on CMP
EF => '',
# HS => '', # require biopsy_site now
# LA => 'C77.9', # require biopsy_site now
PB => 'C42.0',
SE => '', # never issue new diagnosis on SE
TBL => 'C42.1',
TBP => 'C42.1',
TSL => 'C42.1',
URI => '', # never issue new diagnosis on URI
# XA => '', # require biopsy_site now
RXPB => 'C42.0', # should never issue new diagnosis on RXPB
TSM => '', # never issue new diagnosis on TSM
);
my @data = ();
{ # go:
# get request ID's for horizontal (1-to-1) & vertical (1-to-many) tables:
my $request_ids = get_request_ids(); # p $request_ids;
# get request result 1-to-1 data:
my $query = $sql_lib->retr('ncrs_data'); # p $query;
my $result = $dbix->query($query, @$request_ids);
# get data maps of vertical tables - much faster than tying into main query:
my $data_map = get_data_map($request_ids); # p $data_map;
while ( my $vars = $result->hash ) { # p $vars; next;
my $request_id = $vars->{request_id};
# merge vertical table data with $vars:
combine_data($vars, $data_map->{$request_id}); p $vars;
push @data, $vars;
} # p @data;
}
{ # create output file & push to remote server:
my $file_path = join '/', TMP_DIR, $data_filename;
my $content = to_xml(\@data); # p $content;
$content > io($file_path);
# transfer file:
unless ($JUST_TESTING) {
my %args = ( local_filename => $file_path );
transfer_file(\%args);
}
}
#-------------------------------------------------------------------------------
sub to_xml {
my $data = shift; # arrayref of request datasets
my $count = @$data;
# options for XMLout (needs array or hash):
my %xs_opts = xs_options(); # p %xs_opts;
my $xs = XML::Simple->new(%xs_opts);
my $ref = {
Id => {
root => '18DA78B8-44A4-4AA7-95B0-87C198E6B0AE', # where does this come from ?
},
OrgCodeSubmitter => {
extension => HMDS_ORG_CODE,
},
RecordCount => {
value => $count,
},
ReportingPeriodStartDate => $start_date,
ReportingPeriodEndDate => $end_date,
FileCreationDateTime => $timestamp,
};
push @{ $ref->{COSDRecord} }, format_cosd_unit($_) for @$data; # p $ref;
# enclose xml in outer <add> block; add ulisa 'version' inline:
# my $input = { add => $vars, version => 2 };
my $xml_out = $xs->XMLout($ref); # p $xml_out;
return $xml_out;
}
#-------------------------------------------------------------------------------
# formats repeating COSDRecord blocks:
sub format_cosd_unit {
my $v = shift;
my %COSDRecord = (
Id => {
root => 'B63AFD92-568E-4845-A0CE-2643FE15DE08', # where does this come from ?
},
);
my %HAEM;
{ # COSDRecord/Haematology/HaematologyCore:
my %h = (
HaematologyCoreLinkagePatientId => {
NHSNumber => {
extension => $v->{nhs_number},
},
NHSNumberStatusIndicator => {
code => '02', # Number present but not traced
},
LocalPatientId => $v->{unit_number},
Birthdate => $v->{dob},
OrgCodeOfProvider => {
extension => $v->{organisation_code},
},
},
HaematologyCoreLinkageDiagnosis => {
InvestigationResultDate => $v->{authorisation_date},
ServiceReportId => $v->{labno},
},
HaematologyCoreDemographics => {
PersonFamilyName => {
family => $v->{last_name},
},
PersonGivenName => {
given => given_name($v),
},
Address => {
UnstructuredAddress => {
streetAddressLine => $v->{address},
},
},
Postcode => {
postalCode => $v->{post_code},
},
Gender => {
code => gender_code($v->{gender}),
},
},
HaematologyCorePathology => {
PathologistConsultantCode => { extension => $v->{reporter_code} },
PathTestReqCareProfCode => { extension => $v->{referrer_code} },
SampleCollectionDate => { extension => $v->{sample_date} },
OrgCodePathReport => { extension => HMDS_ORG_CODE },
ServiceReportStatus => { code => 1 }, # final (complete)
PathTestReqSiteCode => { extension => $v->{organisation_code} },
PathologyReportText => free_text_report($v),
SampleReceiptDate => $v->{request_date},
MorphologySNOMED => icdo3_to_snomed($v->{icdo3}), # ICDO3
# useless for haematology - refers to solid tumours:
# 1 = primary, 4 = region LN, 5 = metastatic, 9 = not known
SpecimenNature => { code => 9 },
# specimen code (maybe multiple) - not sure yet which to use:
# TopographySNOMED => sample_to_snomed($v),
# PrimaryDiagnosisICDPath => sample_to_snomed($v),
},
);
$HAEM{HaematologyCore} = \%h;
}
{ # COSDRecord/Haematology/HeamatologyContent/LaboratoryResults:
my %h = (
HighThroughputSequencing => $v->{high_throughput_sequencing},
Immunocytochemistry => $v->{immunocytochemistry},
FlowCytometry => $v->{flow_cytometry},
Cytogenetics => $v->{cytogenetics},
MicroArray => $v->{micro_array},
Molecular => $v->{molecular},
FISH => $v->{fish},
);
$HAEM{HaematologyContent}{LaboratoryResults} = \%h;
}
$COSDRecord{Haematology} = \%HAEM;
return \%COSDRecord;
}
#-------------------------------------------------------------------------------
# free-text specimen/biopsy_site/morphology/comment, etc:
sub free_text_report {
my $vars = shift;
my @rows;
push @rows, ( 'SPECIMEN(S): ' . $vars->{specimen_decode} );
if ( my $biopsy_site = $vars->{biopsy_site} ) {
push @rows, ( 'ANATOMICAL SITE(S): ' . $biopsy_site );
}
push @rows, ( 'SPECIMEN QUALITY: ' . $vars->{specimen_quality} );
if ( my $gross_desc = $vars->{gross_description} ) {
push @rows, ( 'GROSS DESCRIPTION: ' . $gross_desc );
}
if ( my $morphology = $vars->{morphology} ) {
push @rows, ( 'MORPHOLOGY: ' . $morphology );
}
if ( my $comment = $vars->{comment} ) {
push @rows, ( 'COMMENT: ' . $comment );
}
push @rows, ( 'DIAGNOSIS: ' . $vars->{diagnosis} );
return join "\n", @rows;
}
#-------------------------------------------------------------------------------
# merge vertical table data with $vars - no return needed (working on hashref)
sub combine_data {
my ($vars, $supplimentary_data) = @_; # p $supplimentary_data;
{ # add data-map fields:
# result summaries:
map $vars->{$_} = $supplimentary_data->{result_summary}->{$_},
qw( immunocytochemistry flow_cytometry fish cytogenetics molecular
micro_array high_throughput_sequencing );
# reporter:
map $vars->{$_} = $supplimentary_data->{reporter_data}->{$_},
qw(reporter_name reporter_code);
# specimen code(s) & decode(s):
$vars->{specimen_code}
= $supplimentary_data->{specimen}->{specimen_code};
$vars->{specimen_decode} # not needed ?
= $supplimentary_data->{specimen}->{description};
} # p $vars;
# tidy up (trim content & substitute new-line markers):
map { $vars->{$_} = tidy($vars->{$_}) }
grep $vars->{$_},
qw(comment immunocytochemistry flow_cytometry fish gross_description
micro_array high_throughput_sequencing cytogenetics molecular);
$vars->{gender} ||= 'U'; # p $vars;
return 0; # updating $vars hashref, return not required
}
#-------------------------------------------------------------------------------
sub sample_to_snomed {
my $vars = shift;
my @snomed;
# get info from biopsy_site if available, otherwise specimen code(s):
if ( my $biopsy_site = $vars->{biopsy_site} ) { # optional
my @sites = split '; ', $biopsy_site; # maybe multiple
push @snomed, $anatomical_site_map->{$_} for @sites;
}
else {
my @specimens = split ',', $vars->{specimen_code};
push @snomed, $specimen_topography{$_} for @specimens;
} # p @snomed;
# legacy biopsy site that doesn't match anatomical_sites.site_name:
return { code => 'C80.9' } unless @snomed; # site unknown
return scalar @snomed == 1
? { code => $snomed[0] }
: [ map { code => $_ }, @snomed ];
}
#-------------------------------------------------------------------------------
sub icdo3_to_snomed { # eg 9960/3 -> M99603
local $_ = shift || return undef; # p $icdo3;
m!(\d{4})/(\d)!;
return 'M' . $1 . $2;
}
#-------------------------------------------------------------------------------
sub gender_code {
my $gender = shift;
return 1 if $gender eq 'M';
return 2 if $gender eq 'F';
return 9; # x = not known, 9 = not specified
}
#-------------------------------------------------------------------------------
# first + optional middle names
sub given_name {
my $vars = shift;
return join ' ', grep $_, @{$vars}{ qw/first_name middle_name/ };
}
#-------------------------------------------------------------------------------
sub xs_options {
my %opts = (
SuppressEmpty => 0, # ? only works for xml_in
RootName => 'COSD',
XMLDecl => q!<?xml version="1.0" encoding="UTF-8"?>!,
KeyAttr => [],
NoAttr => 1, # gives inline (scalar) AND nested (arrayref) attributes
);
return %opts;
}
#-------------------------------------------------------------------------------
sub get_request_ids {
my @request_ids = (); # return [ 264710, 204466 ];
{ # new diagnoses:
my $sql = $sql_lib->retr('ncrs_new_diagnosis_request_ids');
my $request_ids = $dbix->query($sql, $duration, $duration)->flat;
push @request_ids, @$request_ids; # p $request_ids;
}
{ # get revised diagnoses:
my $sql = $sql_lib->retr('ncrs_revised_diagnosis_request_ids');
my $request_ids = $dbix->query($sql, $duration, $duration)->flat;
push @request_ids, @$request_ids; # p $request_ids;
}
# combine new & revised diagnosis request ID's; eliminate duplicates:
my %request_ids = map { $_ => 1 } @request_ids; # p %request_ids;
# return unique ID's as arrayref:
return [ keys %request_ids ];
}
#-------------------------------------------------------------------------------
sub transfer_file { # only called if not $JUST_TESTING
my $args = shift;
my $cfg = $settings->{nycris_server};
my %params = (
local_filename => $args->{local_filename},
server_addr => $cfg->{remote_address},
username => $cfg->{username},
password => $cfg->{password},
); # p %params;
# scp file (returns str on failure, undef on success):
my $rtn = $tools->scp_file(\%params); # p $rtn;
my $script = $tools->script_filename;
if ($rtn) {
$tools->mail_admin({ script => $0, msg => $rtn });
warn "$script - $rtn\n"; # dump to logs but don't die!!
}
else {
archive_and_delete_file($args->{local_filename});
my $month = $ref_datetime->month_name; # for cron log:
print "$script reports data for $month succesfully uploaded\n";
}
}
#-------------------------------------------------------------------------------
# archive data file (only called if not $JUST_TESTING):
sub archive_and_delete_file {
my $src_file = shift; # p $src_file;
my $path_to_archive = $settings->{nycris_server}->{path_to_archive};
# make sure it exists otherwise get error trying to tar non-existant file
if (-e $src_file ) { # warn 'it exists';
my $tar_file = sprintf '%s/%s.tar',
$path_to_archive, $data_filename; # p $tar_file;
chdir TMP_DIR; # so we can use relative path for source directory
system( sprintf 'tar -cf %s %s', $tar_file, $data_filename ); # using relative $data_file.* not full path
system( sprintf "gzip $tar_file" ); # compress tar file
# delete source file:
io($src_file)->unlink;
}
}
#-------------------------------------------------------------------------------
sub get_user_code {
my $username = shift;
return $settings->{national_codes}->{$username}
|| 'H9999998'; # OTHER HEALTH CARE PROFESSIONAL
}
#-------------------------------------------------------------------------------
sub get_data_map {
my $request_ids = shift;
my $map = {};
{ # request_specimens:
my $result = do {
my $sql = $sql_lib->retr('nycris_request_specimens'); # p $sql;
$dbix->query($sql, @$request_ids);
};
while ( my $vars = $result->hash ) { # p $vars;
my $request_id = $vars->{request_id};
my %data = (
specimen_code => $vars->{code},
description => $vars->{description},
); # p %data;
$map->{$request_id}->{specimen} = \%data;
}
}
{ # reporter data:
my $result = do {
my $sql = $sql_lib->retr('nycris_reporters'); # p $sql;
$dbix->query($sql, @$request_ids);
};
while ( my $vars = $result->hash ) { # p $vars;
my $request_id = $vars->{request_id};
my $reporter = join ' ',
ucfirst $vars->{first_name},
ucfirst $vars->{last_name};
my $reporter_code = get_user_code($vars->{username});
my %data = (
reporter_name => $reporter,
reporter_code => $reporter_code,
);
$map->{$request_id}->{reporter_data} = \%data;
} # p $map->{reporter_data};
}
{ # result summaries:
my $sql = $sql_lib->retr('nycris_result_summaries');
my $result = $dbix->query($sql, @$request_ids);
while ( my $vars = $result->hash ) { # p $vars;
my $request_id = $vars->{request_id};
my $section = $vars->{section_name};
my $result = $vars->{results_summary};
$section =~ s/ /_/g; # make section suitable for use as hash key
$map->{$request_id}->{result_summary}->{lc $section} = $result;
} # p $map->{result_summary};
}
return $map;
}
#-------------------------------------------------------------------------------
sub tidy {
my $str = shift;
# trim:
$str =~ s/\A\s+//; # leading
$str =~ s/\s+\Z//; # trailing
# $str =~ s/(\r\n)/$new_line_marker/g; # doesn't capture just \n
$str =~ s/\r?\n/$new_line_marker/g; # captures both \r\n & \n
return $str;
}