#!/usr/bin/perl =begin -------------------------------------------------------------------------------- generates NCRS XML data feed for new and revised diagnoses [or all icdo3 '/3' reports authorised or revised - change nycris to ncrs in libray.sql] during previous 1 month SCP file transfer manual command-line: ncrs_cosd.pl [duration in months - optional (defaults to 1)] scp /tmp/ hmds@163.160.247.17: ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !! for new server: need to manually ssh into server once to add key to known_hosts ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ online reference: ICDO3 topographical codes: http://codes.iarc.fr/topography ICDO3 morphological codes: http://codes.iarc.fr/codegroup/2 -------------------------------------------------------------------------------- =cut my $JUST_TESTING = 1; # skips file transfer & file archive use lib '/home/raj/perl5/lib/perl5'; use XML::Simple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here use Data::Printer; use Config::Auto; use Data::Dumper; use Modern::Perl; use DateTime; # for last_day_of_month() use IO::All; use FindBin qw($Bin); # warn $Bin; use lib $Bin . '/../../../lib'; use LIMS::Local::ScriptHelpers; ############################################# my $delimiter = q{|}; # my $duration = $ARGV[0] || 1; # months ago # my $new_line_marker = '[NEW_LINE]'; # ############################################# my $cfg_file = "$Bin/../lib/settings.cfg"; my $settings = Config::Auto::parse($cfg_file); # p $settings; # get tools from LIMS::Local::ScriptHelpers: my $tools = LIMS::Local::ScriptHelpers->new(); my $sql_lib = $tools->sql_lib(); my $config = $tools->config(); my $dbix = $tools->dbix(); # dates ======================================================================== # get date last month - handles BST: my $ref_datetime = $tools->date_subtract(months => $duration); my $timestamp = $tools->time_now->datetime; my $start_date = $ref_datetime->strftime('%Y-%m-01'); my $end_date = DateTime->last_day_of_month( year => $ref_datetime->year, month => $ref_datetime->month, )->ymd; # p $start_date; p $end_date; # ============================================================================== my $data_filename = sprintf 'hmds_%s_%02d.txt', $ref_datetime->year, $ref_datetime->month; # p $data_filename; # $data_filename = 'hmds_test_cosd.xml'; use constant HMDS_ORG_CODE => 'RR813'; # OrgCodePathReport & OrgCodeSubmitter use constant TMP_DIR => '/tmp'; # where to create temp data file # decode for biopsy_site info: my $anatomical_site_map = $dbix->select('anatomical_sites', [qw(site_name snomed)])->map; # p $anatomical_site_map; # for non-histology specimens (sample_code NOT RLIKE '[DGLRX]([BS]L|F|U)'): # NB these are ICDO3 topography codes for site of tumour origin my %specimen_topography = ( BMA => 'C42.1', BMAT => 'C42.1', CF => '', CHIA => 'C42.0', # should never issue new diagnosis on CHIA CHIB => 'C42.0', # should never issue new diagnosis on CHIB CHIM => 'C42.0', # should never issue new diagnosis on CHIM CMP => '', # should never issue new diagnosis on CMP EF => '', # HS => '', # require biopsy_site now # LA => 'C77.9', # require biopsy_site now PB => 'C42.0', SE => '', # never issue new diagnosis on SE TBL => 'C42.1', TBP => 'C42.1', TSL => 'C42.1', URI => '', # never issue new diagnosis on URI # XA => '', # require biopsy_site now RXPB => 'C42.0', # should never issue new diagnosis on RXPB TSM => '', # never issue new diagnosis on TSM ); my @data = (); { # go: # get request ID's for horizontal (1-to-1) & vertical (1-to-many) tables: my $request_ids = get_request_ids(); # p $request_ids; # get request result 1-to-1 data: my $query = $sql_lib->retr('ncrs_data'); # p $query; my $result = $dbix->query($query, @$request_ids); # get data maps of vertical tables - much faster than tying into main query: my $data_map = get_data_map($request_ids); # p $data_map; while ( my $vars = $result->hash ) { # p $vars; next; my $request_id = $vars->{request_id}; # merge vertical table data with $vars: combine_data($vars, $data_map->{$request_id}); p $vars; push @data, $vars; } # p @data; } { # create output file & push to remote server: my $file_path = join '/', TMP_DIR, $data_filename; my $content = to_xml(\@data); # p $content; $content > io($file_path); # transfer file: unless ($JUST_TESTING) { my %args = ( local_filename => $file_path ); transfer_file(\%args); } } #------------------------------------------------------------------------------- sub to_xml { my $data = shift; # arrayref of request datasets my $count = @$data; # options for XMLout (needs array or hash): my %xs_opts = xs_options(); # p %xs_opts; my $xs = XML::Simple->new(%xs_opts); my $ref = { Id => { root => '18DA78B8-44A4-4AA7-95B0-87C198E6B0AE', # where does this come from ? }, OrgCodeSubmitter => { extension => HMDS_ORG_CODE, }, RecordCount => { value => $count, }, ReportingPeriodStartDate => $start_date, ReportingPeriodEndDate => $end_date, FileCreationDateTime => $timestamp, }; push @{ $ref->{COSDRecord} }, format_cosd_unit($_) for @$data; # p $ref; # enclose xml in outer block; add ulisa 'version' inline: # my $input = { add => $vars, version => 2 }; my $xml_out = $xs->XMLout($ref); # p $xml_out; return $xml_out; } #------------------------------------------------------------------------------- # formats repeating COSDRecord blocks: sub format_cosd_unit { my $v = shift; my %COSDRecord = ( Id => { root => 'B63AFD92-568E-4845-A0CE-2643FE15DE08', # where does this come from ? }, ); my %HAEM; { # COSDRecord/Haematology/HaematologyCore: my %h = ( HaematologyCoreLinkagePatientId => { NHSNumber => { extension => $v->{nhs_number}, }, NHSNumberStatusIndicator => { code => '02', # Number present but not traced }, LocalPatientId => $v->{unit_number}, Birthdate => $v->{dob}, OrgCodeOfProvider => { extension => $v->{organisation_code}, }, }, HaematologyCoreLinkageDiagnosis => { InvestigationResultDate => $v->{authorisation_date}, ServiceReportId => $v->{labno}, }, HaematologyCoreDemographics => { PersonFamilyName => { family => $v->{last_name}, }, PersonGivenName => { given => given_name($v), }, Address => { UnstructuredAddress => { streetAddressLine => $v->{address}, }, }, Postcode => { postalCode => $v->{post_code}, }, Gender => { code => gender_code($v->{gender}), }, }, HaematologyCorePathology => { PathologistConsultantCode => { extension => $v->{reporter_code} }, PathTestReqCareProfCode => { extension => $v->{referrer_code} }, SampleCollectionDate => { extension => $v->{sample_date} }, OrgCodePathReport => { extension => HMDS_ORG_CODE }, ServiceReportStatus => { code => 1 }, # final (complete) PathTestReqSiteCode => { extension => $v->{organisation_code} }, PathologyReportText => free_text_report($v), SampleReceiptDate => $v->{request_date}, MorphologySNOMED => icdo3_to_snomed($v->{icdo3}), # ICDO3 # useless for haematology - refers to solid tumours: # 1 = primary, 4 = region LN, 5 = metastatic, 9 = not known SpecimenNature => { code => 9 }, # specimen code (maybe multiple) - not sure yet which to use: # TopographySNOMED => sample_to_snomed($v), # PrimaryDiagnosisICDPath => sample_to_snomed($v), }, ); $HAEM{HaematologyCore} = \%h; } { # COSDRecord/Haematology/HeamatologyContent/LaboratoryResults: my %h = ( HighThroughputSequencing => $v->{high_throughput_sequencing}, Immunocytochemistry => $v->{immunocytochemistry}, FlowCytometry => $v->{flow_cytometry}, Cytogenetics => $v->{cytogenetics}, MicroArray => $v->{micro_array}, Molecular => $v->{molecular}, FISH => $v->{fish}, ); $HAEM{HaematologyContent}{LaboratoryResults} = \%h; } $COSDRecord{Haematology} = \%HAEM; return \%COSDRecord; } #------------------------------------------------------------------------------- # free-text specimen/biopsy_site/morphology/comment, etc: sub free_text_report { my $vars = shift; my @rows; push @rows, ( 'SPECIMEN(S): ' . $vars->{specimen_decode} ); if ( my $biopsy_site = $vars->{biopsy_site} ) { push @rows, ( 'ANATOMICAL SITE(S): ' . $biopsy_site ); } push @rows, ( 'SPECIMEN QUALITY: ' . $vars->{specimen_quality} ); if ( my $gross_desc = $vars->{gross_description} ) { push @rows, ( 'GROSS DESCRIPTION: ' . $gross_desc ); } if ( my $morphology = $vars->{morphology} ) { push @rows, ( 'MORPHOLOGY: ' . $morphology ); } if ( my $comment = $vars->{comment} ) { push @rows, ( 'COMMENT: ' . $comment ); } push @rows, ( 'DIAGNOSIS: ' . $vars->{diagnosis} ); return join "\n", @rows; } #------------------------------------------------------------------------------- # merge vertical table data with $vars - no return needed (working on hashref) sub combine_data { my ($vars, $supplimentary_data) = @_; # p $supplimentary_data; { # add data-map fields: # result summaries: map $vars->{$_} = $supplimentary_data->{result_summary}->{$_}, qw( immunocytochemistry flow_cytometry fish cytogenetics molecular micro_array high_throughput_sequencing ); # reporter: map $vars->{$_} = $supplimentary_data->{reporter_data}->{$_}, qw(reporter_name reporter_code); # specimen code(s) & decode(s): $vars->{specimen_code} = $supplimentary_data->{specimen}->{specimen_code}; $vars->{specimen_decode} # not needed ? = $supplimentary_data->{specimen}->{description}; } # p $vars; # tidy up (trim content & substitute new-line markers): map { $vars->{$_} = tidy($vars->{$_}) } grep $vars->{$_}, qw(comment immunocytochemistry flow_cytometry fish gross_description micro_array high_throughput_sequencing cytogenetics molecular); $vars->{gender} ||= 'U'; # p $vars; return 0; # updating $vars hashref, return not required } #------------------------------------------------------------------------------- sub sample_to_snomed { my $vars = shift; my @snomed; # get info from biopsy_site if available, otherwise specimen code(s): if ( my $biopsy_site = $vars->{biopsy_site} ) { # optional my @sites = split '; ', $biopsy_site; # maybe multiple push @snomed, $anatomical_site_map->{$_} for @sites; } else { my @specimens = split ',', $vars->{specimen_code}; push @snomed, $specimen_topography{$_} for @specimens; } # p @snomed; # legacy biopsy site that doesn't match anatomical_sites.site_name: return { code => 'C80.9' } unless @snomed; # site unknown return scalar @snomed == 1 ? { code => $snomed[0] } : [ map { code => $_ }, @snomed ]; } #------------------------------------------------------------------------------- sub icdo3_to_snomed { # eg 9960/3 -> M99603 local $_ = shift || return undef; # p $icdo3; m!(\d{4})/(\d)!; return 'M' . $1 . $2; } #------------------------------------------------------------------------------- sub gender_code { my $gender = shift; return 1 if $gender eq 'M'; return 2 if $gender eq 'F'; return 9; # x = not known, 9 = not specified } #------------------------------------------------------------------------------- # first + optional middle names sub given_name { my $vars = shift; return join ' ', grep $_, @{$vars}{ qw/first_name middle_name/ }; } #------------------------------------------------------------------------------- sub xs_options { my %opts = ( SuppressEmpty => 0, # ? only works for xml_in RootName => 'COSD', XMLDecl => q!!, KeyAttr => [], NoAttr => 1, # gives inline (scalar) AND nested (arrayref) attributes ); return %opts; } #------------------------------------------------------------------------------- sub get_request_ids { my @request_ids = (); # return [ 264710, 204466 ]; { # new diagnoses: my $sql = $sql_lib->retr('ncrs_new_diagnosis_request_ids'); my $request_ids = $dbix->query($sql, $duration, $duration)->flat; push @request_ids, @$request_ids; # p $request_ids; } { # get revised diagnoses: my $sql = $sql_lib->retr('ncrs_revised_diagnosis_request_ids'); my $request_ids = $dbix->query($sql, $duration, $duration)->flat; push @request_ids, @$request_ids; # p $request_ids; } # combine new & revised diagnosis request ID's; eliminate duplicates: my %request_ids = map { $_ => 1 } @request_ids; # p %request_ids; # return unique ID's as arrayref: return [ keys %request_ids ]; } #------------------------------------------------------------------------------- sub transfer_file { # only called if not $JUST_TESTING my $args = shift; my $cfg = $settings->{nycris_server}; my %params = ( local_filename => $args->{local_filename}, server_addr => $cfg->{remote_address}, username => $cfg->{username}, password => $cfg->{password}, ); # p %params; # scp file (returns str on failure, undef on success): my $rtn = $tools->scp_file(\%params); # p $rtn; my $script = $tools->script_filename; if ($rtn) { $tools->mail_admin({ script => $0, msg => $rtn }); warn "$script - $rtn\n"; # dump to logs but don't die!! } else { archive_and_delete_file($args->{local_filename}); my $month = $ref_datetime->month_name; # for cron log: print "$script reports data for $month succesfully uploaded\n"; } } #------------------------------------------------------------------------------- # archive data file (only called if not $JUST_TESTING): sub archive_and_delete_file { my $src_file = shift; # p $src_file; my $path_to_archive = $settings->{nycris_server}->{path_to_archive}; # make sure it exists otherwise get error trying to tar non-existant file if (-e $src_file ) { # warn 'it exists'; my $tar_file = sprintf '%s/%s.tar', $path_to_archive, $data_filename; # p $tar_file; chdir TMP_DIR; # so we can use relative path for source directory system( sprintf 'tar -cf %s %s', $tar_file, $data_filename ); # using relative $data_file.* not full path system( sprintf "gzip $tar_file" ); # compress tar file # delete source file: io($src_file)->unlink; } } #------------------------------------------------------------------------------- sub get_user_code { my $username = shift; return $settings->{national_codes}->{$username} || 'H9999998'; # OTHER HEALTH CARE PROFESSIONAL } #------------------------------------------------------------------------------- sub get_data_map { my $request_ids = shift; my $map = {}; { # request_specimens: my $result = do { my $sql = $sql_lib->retr('nycris_request_specimens'); # p $sql; $dbix->query($sql, @$request_ids); }; while ( my $vars = $result->hash ) { # p $vars; my $request_id = $vars->{request_id}; my %data = ( specimen_code => $vars->{code}, description => $vars->{description}, ); # p %data; $map->{$request_id}->{specimen} = \%data; } } { # reporter data: my $result = do { my $sql = $sql_lib->retr('nycris_reporters'); # p $sql; $dbix->query($sql, @$request_ids); }; while ( my $vars = $result->hash ) { # p $vars; my $request_id = $vars->{request_id}; my $reporter = join ' ', ucfirst $vars->{first_name}, ucfirst $vars->{last_name}; my $reporter_code = get_user_code($vars->{username}); my %data = ( reporter_name => $reporter, reporter_code => $reporter_code, ); $map->{$request_id}->{reporter_data} = \%data; } # p $map->{reporter_data}; } { # result summaries: my $sql = $sql_lib->retr('nycris_result_summaries'); my $result = $dbix->query($sql, @$request_ids); while ( my $vars = $result->hash ) { # p $vars; my $request_id = $vars->{request_id}; my $section = $vars->{section_name}; my $result = $vars->{results_summary}; $section =~ s/ /_/g; # make section suitable for use as hash key $map->{$request_id}->{result_summary}->{lc $section} = $result; } # p $map->{result_summary}; } return $map; } #------------------------------------------------------------------------------- sub tidy { my $str = shift; # trim: $str =~ s/\A\s+//; # leading $str =~ s/\s+\Z//; # trailing # $str =~ s/(\r\n)/$new_line_marker/g; # doesn't capture just \n $str =~ s/\r?\n/$new_line_marker/g; # captures both \r\n & \n return $str; }