#!/usr/bin/env perl =begin -------------------------------------------------------------------------------- generates NCRS XML data file for all authorised requests with diagnosis either ICDO3 '/3', or 'see comments' with a non-null comment section, where authorisation date or diagnosis revision date during previous 1 month test mode skips file transfer & archive, dumps data to file (/tmp dir) * results summaries combined with PathologyReportText until schema evolves to accomodate them, then will need to revert as_cdata($result) & to_camel_case() usage: CENTRE= $0 [-m - optional (defaults to 1)][-t - testing] # scp /tmp/ hmds@163.160.247.17: # broken ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ !! for new server: need to manually ssh into server once to add key to known_hosts ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ online reference: Using ICD-O-3 online: http://codes.iarc.fr/usingicdo.php ICDO3 topographical codes: http://codes.iarc.fr/topography ICDO3 morphological codes: http://codes.iarc.fr/codegroup/2 ICD-O-3 to ICD-10 conversion for hemato-oncological diseases: All leukemias have a site-specific code of bone marrow (C42.1) except myeloid sarcoma Multiple myeloma – code site to C42.1 RULE D. Topography codes for lymphomas: If a lymphoma involves multiple lymph node regions, code to C77.8 (lymph nodes of multiple regions). Code extranodal lymphomas to the site of origin, which may not be the site of the biopsy. If no site is indicated for a lymphoma, code to C77.9 (lymph node, NOS). C80.9 if suspected to be extranodal but site is not stated. -------------------------------------------------------------------------------- =cut use Getopt::Std; getopts('m:tq'); # months, testing, sql-trace our($opt_m,$opt_t,$opt_q); # warn $opt_m; exit; use strict; use warnings; use lib '/home/raj/perl5/lib/perl5'; # File::Spec 3.40 required in a ScriptHelpers dependency, so load before charnames # or cron loads system File::Spec 3.33: use File::Spec 3.40; use charnames ':full'; # for perl < 5.16 - to use unicode names (eg LINE FEED) use Encoding::FixLatin qw/fix_latin/; use IPC::System::Simple qw(capture); use Data::GUID qw(guid_string); use XML::Simple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here use XML::SAX::ParserFactory; use XML::Validator::Schema; use XML::Compile::Schema; use SQL::Abstract::More; use Data::Printer; use Config::Auto; use Modern::Perl; use XML::LibXML; use Try::Tiny; use Template; use DateTime; # last_day_of_month() use IO::All -utf8; # fix for "Wide character in print" use FindBin qw($Bin); # warn $Bin; use lib $Bin . '/../../../lib'; use LIMS::Local::ScriptHelpers; use lib '/home/raj/perl-lib'; use Local::DB; # switch off date auto-inflation: $Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; my $centre = $ENV{CENTRE} || die 'no centre param supplied'; #=============================================================================== my $JUST_TESTING = $opt_t || 0; # skips file transfer & file archive my $duration = $opt_m || 1; # months ago my $new_line_marker = '[~~]'; # using CDATA now #=============================================================================== $ENV{SQL_TRACE} = $opt_q; # supply 'SuppressEmpty' arg (1 or undef) to XMLOut: --------------------------- =begin # SuppressEmpty => 1:
# SuppressEmpty => undef:
=cut my $suppress_empty = 1; # options 1 or undef to suppress warning, 0 gives warning #------------------------------------------------------------------------------- my $cfg_file = "$Bin/../lib/settings.cfg"; my $settings = Config::Auto::parse($cfg_file); # p $settings; my $ncrs_cfg = $settings->{ncrs_data}->{$centre} || die "can't find settings for $centre in $cfg_file"; # p $ncrs_cfg; # organisation code where service (HMDS, NBT, UCLH, etc) located: my $service_org_code = $ncrs_cfg->{org_code} || die 'service org code not configured'; my $hmds_org_code = 'RR8F4'; # apparently # get tools from LIMS::Local::ScriptHelpers: my $tools = LIMS::Local::ScriptHelpers->new(); my $config = $tools->config(); my $dbname = $centre eq 'leeds' ? 'hilis4' : $centre; my $dbix = Local::DB->dbix({ dbname => $dbname }); # warn $dbix->dbh->{Name}; # schema definition for validation and XML::Compile::Schema/XML::LibXML method: my $xsd = '/home/raj/apps/HILIS4/setup/schema/xsd/ncrs.xsd'; # dates ======================================================================== my $timestamp = $tools->time_now->datetime; # get date last month - handles BST: my $ref_date = $tools->date_subtract(months => $duration); # format for ReportingPeriodStartDate & ReportingPeriodEndDate my $start_date = $ref_date->strftime('%Y-%m-01'); my $end_date = DateTime->last_day_of_month( year => $ref_date->year, month => $ref_date->month, )->ymd; # p $start_date; p $end_date; # ============================================================================== my @lab_sections = $dbix->select('lab_sections','section_name')->column; # remove non-word chars to match result_summary data from get_data_map(): $_ = to_camel_case($_) for @lab_sections; # p @lab_sections; exit; # The submission file must be named using the following convention: # COSD____ # _.xml my $data_filename = sprintf 'COSD_PATH_%s_%s_%s.xml', $service_org_code, $start_date, $end_date; # p $data_filename; # $data_filename = 'hmds_test_cosd.xml'; my @skip_screens = ( # not very maintainable as new ones added!! 'Inappropriate/unsuitable sample', 'Inappropriate MPN request', 'Rituximab (RA) monitoring', 'Follow-up CML (PB)', 'Follow-up CML (BM)', 'Chimerism (CHIA/CHIB)', 'Chimerism (CHIM)', 'HIV monitoring', # discontinued Feb/2016 'NCG PNH (PB)', # discontinued 03/2019 'PNH NCG (PB)', # new term 03/2019 'NCG PNH (BM)', 'Outreach', ); # exclude anonymised clinical trials: my @anonymised_trials = ( 'HTG RCODOXM/IVAC', 'Archigen AGB002', 'AUTOLUS AUTO2-MM1', 'NCRI INCA', 'PenCTU ENRICH', 'Roche Gallium', 'Unknown/other' # some have anonymised names ); use constant TMP_DIR => '/tmp'; # where to create temp data file my %all_guids; # GUID counter to ensure uniqueness my @data = (); # global !! { # go: # get requests 1-to-1 data: my @requests = do { my ($sql, @bind) = _ncrs_data(); $dbix->query($sql, @bind)->hashes; }; # p @requests; exit; my @request_ids = map $_->{request_id}, @requests; # p @request_ids; # get data maps of vertical tables - much faster than tying into main query: my $data_map = get_data_map(\@request_ids); # p $data_map; for my $r (@requests) { # p $r; next; my $request_id = $r->{request_id}; # merge vertical table data with $r: my $vars = combine_data($r, $data_map->{$request_id}); push @data, $vars; } # p @data; } # enable hash counter in new_guid() for debugging: say "$_ is not unique" for grep { $all_guids{$_} > 1 } keys %all_guids; { # create output file & push to remote server: my $file_path = join '/', TMP_DIR, $data_filename; # generate xml (validation failure warns error & returns 0): if ( my $content = to_xml() ) { # p $content; $content > io($file_path); # transfer file: unless ($JUST_TESTING) { my %args = ( local_filename => $file_path ); transfer_file(\%args); } } } #------------------------------------------------------------------------------- sub to_xml { my $count = @data; # p $count; my $ref = { OrganisationIdentifierCodeOfSubmittingOrganisation => { # always Leeds/HMDS: extension => $hmds_org_code, }, Id => { root => new_guid() }, # returns new Data::GUID as_string RecordCount => { value => $count }, ReportingPeriodStartDate => $start_date, ReportingPeriodEndDate => $end_date, FileCreationDateTime => $timestamp, }; # p $ref; my $xml_out = #xml_simple($ref); # XML::Simple method #xml_libxml($ref); # XML::LibXML method xml_template($ref); # TT method (provides easiest layout control) # hack to add namespace data (don't need this for TT method): # chomp( my $namespace = namespace_data() ); # remove trailing new-line # $xml_out =~ s/(COSD:COSD)/$1\n$namespace/; # don't use 'g', only want start-tag # p $xml_out; { # validate xml: # switch debug on to show data structure: my $validator = XML::Validator::Schema->new(file => $xsd, debug => 0); # p $validator; my $parser = XML::SAX::ParserFactory->parser(Handler => $validator); my $err; try { $parser->parse_string($xml_out) } catch { $err = $_ }; if ($err) { warn $err; return 0; } } return $xml_out; } #------------------------------------------------------------------------------- # formats repeating COSDRecord blocks: sub format_cosd_unit { my $v = shift; my %COSDRecord = ( Id => { root => new_guid(), # returns new Data::GUID as_string }, ); my %HAEM; { # COSDRecord/Haematology/HaematologyCore: my $labno = $ncrs_cfg->{labno_prefix} . $v->{request_number} . '/' . ( $v->{year} - 2000 ); # NHSNumberStatusIndicator = "Number present but not traced (02)" # OR "Number not present and trace not required (07)": my $NHSNumberStatusIndicator = defined $v->{nhs_number} ? '02' : '07'; my %h = ( HaematologicalCoreLinkagePatientId => { NHSNumber => { extension => $v->{nhs_number}, }, NHSNumberStatusIndicator => { code => $NHSNumberStatusIndicator, }, LocalPatientIdExtended => $v->{unit_number}, Birthdate => $v->{dob}, OrganisationIdentifierCodeOfProvider => { extension => $v->{organisation_code}, }, }, HaematologicalCoreDemographics => { PersonFamilyName => { family => $v->{last_name}, }, PersonGivenName => { given => given_name($v), }, Address => { UnstructuredAddress => { streetAddressLine => $v->{address}, }, }, Postcode => { postalCode => $v->{post_code}, }, Gender => { code => gender_code($v->{gender}), }, }, HaematologicalCorePathology => { OrganisationSiteIdentifierOfPathologyTestRequest => { extension => $v->{organisation_code} }, OrganisationIdentifierOfReportingPathologist => { extension => $service_org_code }, PathologistConsultantCode => { extension => to_gmc($v->{reporter_code}) }, PathTestReqCareProfCode => { extension => to_gmc($v->{referrer_code}) }, MorphologySNOMEDPathology => { # ICDO3 code => icdo3_to_snomed($v->{icdo3}) }, # could also use: # # ServiceReportStatus => { code => report_status($v->{diagnosis}) }, InvestigationResultDate => $v->{authorisation_date}, PathologyReportText => free_text_report($v), SampleCollectionDate => $v->{sample_date}, # SampleReceiptDate => $v->{request_date}, # moved below ... ServiceReportId => { extension => $labno }, # morphology code if needed is 3 or 1 (from ICDO3 /3 or /1 suffix) # useless for haematology - refers to solid tumours: # 1 = primary, 4 = region LN, 5 = metastatic, 9 = not known # SpecimenNature => { code => 9 }, # One of TOPOGRAPHY (SNOMED), TOPOGRAPHY (SNOMED CT) or PRIMARY # DIAGNOSIS (ICD PATHOLOGICAL) is required for the schema. It is # expected that most submissions will include TOPOGRAPHY (SNOMED). # PRIMARY DIAGNOSIS (ICD PATHOLOGICAL) is the PRIMARY DIAGNOSIS # based on the evidence from a pathological examination. Format # CXX.X or DXX.X # PrimaryDiagnosisICDPath => sample_to_snomed($v), # topography code (primary site of origin): suspended # TopographySNOMEDPathology => { code => topography_code($v) }, }, ); # only require one of SampleCollectionDate / SampleReceiptDate: unless ( $v->{sample_date} ) { my $ReceiptDate = $v->{request_date}; $h{HaematologicalCorePathology}{SampleReceiptDate} = $ReceiptDate; } $HAEM{HaematologicalCore} = \%h; } # lab results not part of v8 schema (temporarily combined with PathologyReportText): # { # COSDRecord/Haematology/HaematologyContent/LaboratoryResults: # my %h = map +( $_ => $v->{$_} ), @lab_sections; # $HAEM{HaematologicalContent}{LaboratoryResults} = \%h; # } $COSDRecord{Haematological} = \%HAEM; return \%COSDRecord; } =begin sub xml_simple { # XML::Simple (doesn't support ordered elements without hack) my $ref = shift; # options for XMLout (needs array or hash): my %xs_opts = xs_options(); # p %xs_opts; my $xs = XML::Simple->new(%xs_opts); my $xml_out = $xs->XMLout($ref); # p $xml_out; { # get repeating COSDRecords (horrible hack because XML::Simple doesn't # support ordered elements, create 2nd xml document and splice into outer one: my %xs_opts = xs_options(); # p %xs_opts; $xs_opts{RootName} = undef; # suppress, don't want outer xml element delete $xs_opts{XMLDecl}; # delete, don't need for COSDRecord elements my $xs = XML::Simple->new(%xs_opts); # p %xs_opts; my @COSDRecords = map format_cosd_unit($_), @data; # p @COSDRecords; my $records = $xs->XMLout({ COSDRecord => \@COSDRecords }); # p $records; # splice in cosd records: $xml_out =~ s{()}{$records . $1}e; } return $xml_out } sub xml_libxml { # XML::LibXML method my $ref = shift; push @{ $ref->{COSDRecord} }, format_cosd_unit($_) for @data; # p $ref; # schema created from dummy xml by https://www.freeformatter.com/xsd-generator.html#ad-output # had to change COSD:COSD in xml to COSD or outer element skipped: my $schema = XML::Compile::Schema->new($xsd); # p $schema; # see what types are defined # $schema->printIndex; # exit; # p $schema->template('PERL' => 'COSD'); exit; # create and use a writer my $write = $schema->compile(WRITER => 'COSD'); my $doc = XML::LibXML::Document->new('1.0', 'UTF-8'); my $xml = $write->($doc, $ref); $doc->setDocumentElement($xml); # show result my $xml_out = $doc->toString(1); # change COSD to COSD:COSD (as colon not supported in xsd): # "error: field name contains 'COSD:COSD' which is not a valid NCName at COSD:COSD" $xml_out =~ s/COSD(?!Record)/COSD:COSD/g; # match but not return $xml_out; } =cut sub xml_template { # TT method: my $ref = shift; push @{ $ref->{COSDRecord} }, format_cosd_unit($_) for @data; my %args = ( INCLUDE_PATH => ['/home/raj/apps/HILIS4/templates/cron'], # ENCODING => 'utf8', # not sure it's needed ); my $xml_out; my $tt = Template->new(\%args); $tt->process('ncrs_cosd.tt', { data => $ref }, \$xml_out) or die $tt->error(); return $xml_out; } #------------------------------------------------------------------------------- sub report_status { # 1 = Final (Complete); 2 = Preliminary (Interim) my $diagnosis = shift; # p $diagnosis; my @interim_terms = ( 'awaiting final (diagnosis|review)', 'interim result', # NBT ); return ( grep { lc $diagnosis =~ /^$_/ } @interim_terms ) ? 2 : 1; } #------------------------------------------------------------------------------- # free-text specimen/biopsy_site/morphology/comment, etc: sub free_text_report { my $vars = shift; # p $vars; my @rows; push @rows, ( 'SPECIMEN(S): ' . $vars->{specimen_decode} ); if ( my $biopsy_site = $vars->{biopsy_site} ) { push @rows, ( 'ANATOMICAL SITE(S): ' . $biopsy_site ); } push @rows, ( 'SPECIMEN QUALITY: ' . $vars->{specimen_quality} ); if ( my $gross_desc = $vars->{gross_description} ) { push @rows, ( 'GROSS DESCRIPTION: ' . $gross_desc ); } if ( my $details = $vars->{clinical_details} ) { push @rows, ( 'CLINICAL DETAILS: ' . $details ); } for my $section (@lab_sections) { # p $section; # results summaries: my $summary = $vars->{$section} || next; # p $summary; # next unless $section eq 'haematology'; push @rows, ( join ': ', uc $section, $summary . "\n" ); } if ( my $morphology = $vars->{morphology} ) { push @rows, ( 'MORPHOLOGY: ' . $morphology ); } if ( my $comment = $vars->{comment} ) { push @rows, ( 'COMMENT: ' . $comment ); } push @rows, ( 'DIAGNOSIS: ' . $vars->{diagnosis} ); push @rows, ( 'REPORTED BY: ' . $vars->{reporter_name} . ' on ' . $vars->{report_datetime} ); my $str = join "\n", @rows; # convert all non-utf8 chars (eg pasting from Word): my $utf8_text = fix_latin($str); # enclose text in CDATA tags: return as_cdata($utf8_text); } #------------------------------------------------------------------------------- # merge vertical table data with $vars sub combine_data { my ($vars, $supplimentary_data) = @_; # p $supplimentary_data; { # add data-map fields: # result summaries (enclose text in CDATA tags): for (@lab_sections) { my $result = $supplimentary_data->{result_summary}->{$_} || next; # can't be '0' # temporarily combined with PathologyReportText so doesn't need to be CDATA: $vars->{$_} = $result; # as_cdata($result); } # reporter: $vars->{$_} = $supplimentary_data->{reporter_data}->{$_}, for qw(reporter_name reporter_code report_datetime); # specimen code(s) & decode(s): $vars->{specimen_code} = $supplimentary_data->{specimen}->{specimen_code}; $vars->{specimen_decode} = $supplimentary_data->{specimen}->{description}; } # p $vars; $vars->{gender} ||= 'U'; # p $vars; return $vars; } #------------------------------------------------------------------------------- sub topography_code { my $vars = shift; # C80.9 / Unknown primary site: return 'C80.9' unless $vars->{icdo3}; my ($icdo3) = $vars->{icdo3} =~ /^(\d{4})/; # capture 1st 4 digits; # plasmacytomas? CLL? EMZL? - check the MZL & MYD88-mutated entries!! my %topo_code = ( 8720 => 'C44._', # metastatic melanoma 9689 => 'C42.2', # splenic MZL 9699 => 'C80.9', # extranodal MZL (Rule D) 9700 => 'C44._', # MF 9709 => 'C44._', # cuteneous TCL 9718 => 'C44._', # primary cuteneous CD30+ LPD # 9731 => 'C40._', # plasmacytoma (only of bone; C40 is bones, joints, etc) 9732 => 'C42.1', # myeloma 9733 => 'C42.1', # plasma-cell leukaemia ); return $topo_code{$icdo3} if $topo_code{$icdo3}; # primary non-haematological tumour/metastatic carcinoma: return 'C80.9' if in_num($icdo3, 8000, 8010); # rhabdomyosarcoma/pnet/neuroblastoma: return 'C80.9' if in_num($icdo3, 8900, 9260, 9500); # mastocytosis, histiocytosis, langerhans: return 'C80.9' if between($icdo3, 9741 => 9755); # lymphomatoid granulomatosis, amyloidosis: return 'C80.9' if between($icdo3, 9766 => 9769); # Rule E: all leukaemias (9801/3 .. 9989/3): return 'C42.1' if between($icdo3, 9801 => 9990); # Rule D: default for lymphomas (9591/3 .. 9739/3) return 'C77._' if between($icdo3, 9591 => 9739); # escaped all above returns so output for debugging: p $vars->{diagnosis}; } #------------------------------------------------------------------------------- sub between { # adapated from Acme::Tools my ($test, $lower, $upper) = @_; return $lower < $upper ? $test >= $lower && $test <= $upper : $test >= $upper && $test <= $lower; } #------------------------------------------------------------------------------- sub in_num { # lifted from Acme::Tools my $val = shift || return 0; # shift 1st val, remaining in @_ for (@_) { return 1 if $_ == $val } return 0; } #------------------------------------------------------------------------------- sub icdo3_to_snomed { # eg 9960/3 -> M99603 local $_ = shift || return undef; # p $icdo3; s{/}{}; # remove slash return 'M' . $_; } #------------------------------------------------------------------------------- sub gender_code { my $gender = shift; return 1 if lc $gender eq 'm'; return 2 if lc $gender eq 'f'; return 9; # x = not known, 9 = not specified } #------------------------------------------------------------------------------- # first + optional middle names sub given_name { my $vars = shift; return join ' ', grep $_, @{$vars}{ qw/first_name middle_name/ }; } #------------------------------------------------------------------------------- sub xs_options { my %opts = ( SuppressEmpty => $suppress_empty, RootName => 'COSD:COSD', NoEscape => 1, # to preserve tags XMLDecl => q!!, KeyAttr => [], NoAttr => 1, # gives inline (scalar) AND nested (arrayref) attributes ); return %opts; } #------------------------------------------------------------------------------- # sub transfer_file { # only called if not $JUST_TESTING my $args = shift; =begin scp function broken, replaced by email 07/2016 my $cfg = $settings->{nycris_server}; my %params = ( local_filename => $args->{local_filename}, server_addr => $cfg->{remote_address}, username => $cfg->{username}, password => $cfg->{password}, ); # p %params; # scp file (returns str on failure, undef on success): # my $rtn = $tools->scp_file(\%params); =cut my $rtn = email_file(); # from 05/07/2016 my $script = $tools->script_filename; if ($rtn) { $tools->mail_admin({ script => $0, msg => $rtn }); warn "$script - $rtn\n"; # dump to logs but don't die!! } else { archive_and_delete_file($args->{local_filename}); my $month = $ref_date->month_name; # for cron log: print "$script reports $centre data for $month successfully dispatched\n"; } } #------------------------------------------------------------------------------- sub email_file { # email a password-protected file: # $data_filename too long for use as subject - capture org-code_year-month: my ($subject) = $data_filename =~ /(COSD_PATH_\w{5}_\d{4}-\d{2})/; # create file id from organisation code & date, for use as zip filename & pwd: my ($file_id) = $subject =~ /(\w{5}_\d{4}-\d{2})/; # p $file_id; my $password = ucfirst lc $file_id; # p $password; my $zip_filename = lc $file_id . '.zip'; # p $zip_filename; my @cmd = ('/usr/bin/zip'); push @cmd, '-q'; # quiet push @cmd, '-j'; # junk dir name push @cmd, ( '--password', $password ); # -------------------------------------------------------------------------- push @cmd, '-'; # only needed to allow piping of content if using capture() # -------------------------------------------------------------------------- push @cmd, TMP_DIR . '/' . $zip_filename; push @cmd, TMP_DIR . '/' . $data_filename; # p @cmd; # override local centre service email with HMDS nhs.net address: $config->{service_email} = 'hmds.lth@nhs.uk'; # p $config; =begin # method to save zip file to disk & then use io() for $content: # system(@cmd); # my $content = io->file(TMP_DIR . '/' . $zip_filename)->all; =cut # use IPC::System::Simple to capture zip output: my $content = capture(@cmd); my %mail = ( config => $config, subject => $subject, filename => $zip_filename, attachment => $content, ); # p %mail; # p @recipients; my $ok = $tools->send_mail(\%mail, [ $ncrs_cfg->{recipient} ]); # returns 0 if msg fails # for compatibility with scp_file(): return 'message(s) failed to send' if not $ok; return 0; # don't return true unless msg fails } #------------------------------------------------------------------------------- # archive data file (only called if not $JUST_TESTING): sub archive_and_delete_file { my $src_file = shift; # p $src_file; my $path_to_archive = $settings->{nycris_server}->{path_to_archive}; # $data_filename too long - capture org-code_year-month: my ($archive_filename) = $data_filename =~ /COSD_PATH_(\w{5}_\d{4}-\d{2})/; # make sure it exists otherwise get error trying to tar non-existant file if (-e $src_file ) { # warn 'it exists'; my $tar_file = sprintf '%s/%s.tar', $path_to_archive, $archive_filename; # p $tar_file; chdir TMP_DIR; # so we can use relative path for source directory system( sprintf 'tar -cf %s %s', $tar_file, $data_filename ); # using relative $data_file.* not full path system( sprintf "gzip $tar_file" ); # compress tar file # delete source file: io($src_file)->unlink; } } #------------------------------------------------------------------------------- sub get_data_map { my $request_ids = shift; # p $request_ids; my $map = {}; { # request_specimens: my $result = do { my ($sql, @bind) = _request_specimens($request_ids); $dbix->query($sql, @bind); }; while ( my $vars = $result->hash ) { # p $vars; my $request_id = $vars->{request_id}; my %data = ( specimen_code => $vars->{code}, description => $vars->{description}, ); # p %data; $map->{$request_id}->{specimen} = \%data; } } { # reporter data: my $result = do { my ($sql, @bind) = _reporters($request_ids); $dbix->query($sql, @bind); }; while ( my $vars = $result->hash ) { # p $vars; my $request_id = $vars->{request_id}; my $reporter = join ' ', ucfirst $vars->{first_name}, ucfirst $vars->{last_name}; my $reporter_code = $vars->{registration_number} || 'H9999998'; # OTHER HEALTH CARE PROFESSIONAL; my %data = ( reporter_name => $reporter, reporter_code => $reporter_code, report_datetime => $vars->{report_datetime}, ); # p %data; $map->{$request_id}->{reporter_data} = \%data; } } { # result summaries: my $result = do { my ($sql, @bind) = _result_summaries($request_ids); $dbix->query($sql, @bind); }; while ( my $vars = $result->hash ) { # p $vars; my $request_id = $vars->{request_id}; my $section = to_camel_case($vars->{section_name}); my $result = fix_latin($vars->{results_summary}); $map->{$request_id}->{result_summary}->{$section} = $result; } # p $map->{result_summary}; } # p $map; return $map; } #------------------------------------------------------------------------------- sub tidy { # not in use, replaced by as_cdata() my $str = shift; # p $str; # trim: $str =~ s/\A\s+//; # leading $str =~ s/\s+\Z//; # trailing #$str =~ s/\r?\n/$new_line_marker/g; # captures both \r\n & \n $str =~ s/\N{CARRIAGE RETURN}?\N{LINE FEED}/$new_line_marker/g; } #------------------------------------------------------------------------------- # need to use XML::Simple opt NoEscape => 1 to preserve CDATA tags: sub as_cdata { my $str = shift; # 1) for XML::Simple # return ""; # 2) for XML::LibXML and Template methods: my $cdata = XML::LibXML::CDATASection->new($str); # p $cdata->textContent; return $cdata; } #------------------------------------------------------------------------------- sub to_camel_case { my $ref = shift; # p $ref; $ref =~ s/_/ /g; # convert underscores to spaces for camelCasing # now temporarily combined with PathologyReportText so don't need this yet: # $ref = join '', map { ucfirst $_ } split '\W', $ref; # p $ref; return $ref; } #------------------------------------------------------------------------------- sub to_gmc { # only GMC codes permitted (CS codes under consideration): my $reg_num = shift; # $reg_num = 'H9999998' unless $reg_num =~ /^[CH]\d{7}$/; # handled at PHE now return $reg_num; } #------------------------------------------------------------------------------- sub namespace_data { # TODO: confirm uri's return <{report_status}; my @cols = ( qw/ DISTINCT(r.id)|request_id DATE(r.created_at)|request_date r.request_number r.year p.last_name p.first_name p.middle_name p.nhs_number pc.unit_number p.dob p.gender pd.address pd.post_code ref.national_code|referrer_code rs.organisation_code rr.biopsy_site rr.gross_description rr.specimen_quality DATE(rr.specimen_date)|sample_date rr.clinical_details rr.morphology rr.comment d.name|diagnosis d.icdo3 DATE(rh.time)|authorisation_date / ); my @rels = ( 'requests|r' => q{r.patient_case_id=pc.id} , 'patient_case|pc' => q{pc.patient_id=p.id} , 'patients|p' => q{r.referrer_department_id=rd.id} , 'referrer_department|rd' => q{rd.referrer_id=ref.id} , 'referrers|ref' => q{pc.referral_source_id=rs.id} , 'referral_sources|rs' => q{rh.request_id=r.id} , 'request_history|rh' => q{rr.request_id=r.id} , 'request_report_view|rr' => q{rr.diagnosis_id=d.id} , 'diagnoses|d' => q{ris.request_id=r.id} , 'request_initial_screen|ris' => q{ris.screen_id=s.id} , 'screens|s' => q{=>rdh.request_id=r.id} , 'request_diagnosis_history|rdh' => q{=>pd.patient_id=p.id} , 'patient_demographics|pd' => q{=>rt.request_id=r.id} , 'request_trial|rt' => q{=>rt.trial_id=ct.id} , 'clinical_trials|ct' ); # local_network_locations: push @rels, ( 'lnl.parent_id=rs.parent_organisation_id' => 'local_network_locations|lnl' ) if $centre eq 'leeds'; my %where = ( -and => [ { # is reported/authorised & not excluded screens or [CN]EQAS cases: # 'p.first_name' => { -not_like => '_' }, # some are valid records -and => [ 'p.last_name' => { -not_rlike => '[cn]qas' }, 'p.last_name' => { -not_like => '_' }, ], 'rh.action' => $report_status, 's.description' => { -not_in => \@skip_screens }, # 'r.id' => { -in => [353199, 353081] }, # force specific requests }, -or => [ # reported/authorised or diagnosis history dates: { 'MONTH(rh.time)' => $ref_date->month, 'YEAR(rh.time)' => $ref_date->year, }, { 'MONTH(rdh.time)' => $ref_date->month, 'YEAR(rdh.time)' => $ref_date->year, }, ], #=============================================================================== # all requests with ICDO3 /3 diagnosis: 'd.icdo3' => { -like => '%3' }, # any request (with/without icdo3, or clinical trial WITH icdo3): # -or => [ # all non-trial cases OR clinical trial with ICDO3 diagnosis # 'rt.request_id' => undef, # 'd.icdo3' => { -not => undef }, # ], # diagnosis not MGUS, or is 'see coments' with rr.comment - only # makes sense if combined with "d.icdo => { -not undef }" # -or => [ # 'd.name' => { '-not_rlike' => 'MGUS' }, # { # 'd.name' => 'see comments', # 'rr.comment' => { '!=' => undef } # }, # ], #=============================================================================== -or => [ # if trial, not an anomimised one 'ct.trial_name' => undef, 'ct.trial_name' => { -not_in => \@anonymised_trials }, ], -not => { # diagnosis = 'see comments' and comment is null 'rr.comment' => undef, 'd.name' => 'see comments', }, ], ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, -order_by => 'r.id', # -limit => 1, # for testing # -offset => 15, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; # $dbix->dump_query($sql, @bind); exit; return ($sql, @bind); } sub _request_specimens { my $request_ids = shift; my @cols = ( 'rs.request_id', "GROUP_CONCAT(s.description separator '; ')|description", 'GROUP_CONCAT(s.sample_code)|code', ); my @rels = ( 'request_specimen|rs', 'rs.specimen_id=s.id', 'specimens|s' ); my %where = ( 'rs.request_id' => { -in => $request_ids } ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, -group_by => 'rs.request_id', ); # p @args; # need to supply our own injection_guard to override built-in sql injection # attack prevention which detects ';' in GROUP_CONCAT, but we don't need one # here as we have no user input: my $sqla = SQL::Abstract::More->new( injection_guard => qr/^$/ ); my ($sql, @bind) = $sqla->select(@args); # p $sql; p \@bind; # $dbix->dump_query($sql, @bind); exit; return ($sql, @bind); } sub _reporters { my $request_ids = shift; my @cols = qw( rh.request_id rh.time|report_datetime u.username u.first_name u.last_name ur.registration_number ); my @rels = ( 'request_history|rh' => 'rh.user_id=u.id', 'users|u' => '=>ur.user_id=u.id', 'user_registration|ur' ); my %where = ( 'rh.request_id' => { -in => $request_ids }, 'rh.action' => 'reported', ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; # $dbix->dump_query($sql, @bind); exit; return ($sql, @bind); } sub _result_summaries { my $request_ids = shift; my @cols = qw( rs.request_id rs.results_summary ls.section_name ); my @rels = qw( request_result_summaries|rs rs.lab_section_id=ls.id lab_sections|ls ); my %where = ( 'rs.request_id' => { -in => $request_ids } ); my @args = ( -columns => \@cols, -from => [ -join => @rels ], -where => \%where, ); # p @args; my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; p \@bind; # $dbix->dump_query($sql, @bind); exit; return ($sql, @bind); } #------------------------------------------------------------------------------- =begin # not mapping samples to snomed # decode for biopsy_site info: my $anatomical_site_map = $dbix->select('anatomical_sites', [qw(site_name snomed)])->map; # p $anatomical_site_map; sub sample_to_snomed { my $vars = shift; my @snomed; # get info from biopsy_site if available, otherwise specimen code(s): if ( my $biopsy_site = $vars->{biopsy_site} ) { # optional my @sites = split '; ', $biopsy_site; # maybe multiple push @snomed, $anatomical_site_map->{$_} for @sites; } else { my @specimens = split ',', $vars->{specimen_code}; push @snomed, $specimen_topography{$_} for @specimens; } # p @snomed; # legacy biopsy site that doesn't match anatomical_sites.site_name: return { code => 'C80.9' } unless @snomed; # site unknown return scalar @snomed == 1 ? { code => $snomed[0] } : [ map { code => $_ }, @snomed ]; } # for non-histology specimens (sample_code NOT RLIKE '[DGLRX]([BS]L|F|U)'): # NB these are ICDO3 topography codes for site of tumour origin my %specimen_topography = ( BMA => 'C42.1', BMAT => 'C42.1', CF => '', CHIA => 'C42.0', # should never issue new diagnosis on CHIA CHIB => 'C42.0', # should never issue new diagnosis on CHIB CHIM => 'C42.0', # should never issue new diagnosis on CHIM CMP => '', # should never issue new diagnosis on CMP EF => '', # HS => '', # require biopsy_site now # LA => 'C77.9', # require biopsy_site now PB => 'C42.0', SE => '', # never issue new diagnosis on SE TBL => 'C42.1', TBP => 'C42.1', TSL => 'C42.1', URI => '', # never issue new diagnosis on URI # XA => '', # require biopsy_site now RXPB => 'C42.0', # should never issue new diagnosis on RXPB TSM => '', # never issue new diagnosis on TSM ); =cut