#!/usr/bin/perl
=begin -------------------------------------------------------------------------
Generates pipe-delimited txt file on Roche Gallium trial cases for Quintiles.
Record must be authorised to be included in data feed. Later requests on same
QLABS accession number (Ennnnnnn) supercede earlier ones
Quintiles require unique accession number / test code combination - tested for
at end
TODO: what does digit terminal mean?
=cut ---------------------------------------------------------------------------
use strict;
use warnings;
my $JUST_TESTING = 0; # dump data to file
use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;
use lib "$Bin/../../../lib";
use IO::All;
use Data::Dump qw(dump);
use LIMS::Local::Utils;
use DateTime::Format::MySQL;
use LIMS::Local::ScriptHelpers;
use Spreadsheet::WriteExcel::Simple;
my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);
################################################################################
my @recipients = qw(archana.ambily hmds.lth@nhs.net);
my $filename = sprintf 'BO21223_%s', $tools->time_now->ymd;
my $subject = $filename;
################################################################################
# get tools from LIMS::Local::ScriptHelpers:
my $sql_lib = $tools->sql_lib();
my $config = $tools->config();
my $dbix = $tools->dbix();
# Quintiles test codes => HMDS test_name map:
my %SDL = (
cd3 => 'SDL2110',
cd5 => 'SDL2099',
cd10 => 'SDL2100',
cd20 => 'SDL2101',
cd23 => 'SDL2102',
cd79 => 'SDL2105',
bcl1 => 'SDL2104', # = Cyclin D1
bcl2 => 'SDL2103',
bcl6 => 'SDL2108',
irf4 => 'SDL2106',
ki67 => 'SDL2109',
BCL2 => 'SDL2115', # FISH
BCL6 => 'SDL2115', # FISH
foxp1 => 'SDL2107',
BCL2_IgH => 'SDL2117', # FISH
);
my $results = do {
my $sql = $sql_lib->retr('gallium_trial_data_quintiles'); # warn $sql;
$dbix->query($sql)->hashes;
};
my %results; # AoH
ROW: # group data by accession number:
for my $row ( @$results ) { # warn dump $row;
# only want data where accession number in QLAB format (Ennnnnnn-n):
my ($accession_number) = $row->{accession_number} =~ /^(E\d+)\-\d/;
next ROW unless $accession_number; # warn $accession_number;
# collect as array of datasets for each accession number:
push @{ $results{$accession_number} }, $row;
}
my $trim = sub { LIMS::Local::Utils::trim(@_) };
# regex's in use in block:
my $words_re = qr{\s?(\b\w+\b( \b\w+\b)?)}; # matches 1 or 2 words
my $grade_re = qr{\b(grade\s\d\w?)\b}i; # lymphoma grade 1,2,3[ab]
# IgH-ASO result:
my $igh_aso_re = qr{\s?(not evaluable|(mono|oligo|poly)clonal)}i;
my $biopsy_re = qr/
biopsy\:\s*(.*?)\.
/ix; # capture all between ':' and 1st full-stop
my @rows;
# process sole - or most recent if >1 - dataset for each accession number:
for my $accession_number ( sort keys %results ) { # warn dump $accession_number;
my $data = $results{$accession_number};
# $data is either hashref or array(ref) of hashrefs:
my $vars = ref $data eq 'ARRAY'
? $data->[-1] # use most recent dataset (query ensures request.id order)
: $data; # warn dump $vars;
my $NULL = ""; # to avoid need for no warnings 'uninitialized'
my $dob;
if ( my $date = $vars->{dob} ) { # dd-MON-YYYY format required !!
$dob = DateTime::Format::MySQL->parse_date($date)->strftime('%d-%b-%Y');
$dob =~ s/[a-z]/[A-Z]/; # dd-mon-yyyy -> dd-MON-yyyy
}
my @common = ( # common fields for all rows:
'Leeds', # referring lab
$vars->{hmds_ref}, # HMDS number
$accession_number, # QLABS accession number (1st 8 chars only)
$vars->{patient_id} || $NULL, # subject ID
$dob || $NULL, # dd-MON-YYYY format required !!
'TISSUE FOR PATH REVIEW',
);
{ # SDL2087: [First Time Reviewer / Adjudicator]
my $result = $vars->{clinical_details} =~ /referred from Kiel/i
? 'Adjudicator' : 'First Time Reviewer';
push @rows, join '|', @common, ( 'SDL2087', $result, $NULL );
}
{ # SDL2088: [Anatomical localisation of biopsy]
if ( my ($site) = $vars->{comment} =~ /$biopsy_re/ ) {
push @rows, join '|', @common, ( 'SDL2088', &$trim($site), $NULL );
}
# SDL2093: [Pattern of involvement: interstitial, diffuse, etc]
# can't extract pattern of involvment from comment yet
}
{ # SDL2090: [Follicular lymphoma grade: 1, 2, 3a, 3b]
if ( my $icdo3 = $vars->{icdo3} ) { # extract grade - only if FL:
if ( $icdo3 =~ 9690 && $vars->{comment} =~ /$grade_re/ ) {
push @rows, join '|', @common, ( 'SDL2090', lc $1, $NULL ); # warn dump $1;
}
}
}
{ # SDL2089: [Sample quality: Good/Average/Poor]
my $result = $vars->{specimen_quality};
$result =~ s/adequate/good/; # only using adequate & poor
push @rows, join '|', @common, ( 'SDL2089', ucfirst $result, $NULL );
}
{ # SDL2091: Morphology comment [free text]
my $comment = $vars->{comment};
$comment =~ s/[\n\r]/ /g; # replace new-lines with spaces
push @rows, join '|', @common, ( 'SDL2091', $comment, $NULL );
}
{ # SDL2092: Lymphoma involvement [%]
push @rows, join '|', @common, ( 'SDL2092', 100, $NULL ); # temp assessment
}
{ # SDL2099 - SDL2110 marker results [+, -, +/-, Not Evaluable]
TEST:
while ( my ($marker, $test_code) = each %SDL ) {
my $result = $vars->{$marker} || next TEST;
# TODO: might need to examine specimen type for suitability -> comment:
my $comment = $vars->{$marker} ? $NULL : 'Test not performed';
push @rows, join '|', @common, ( $test_code, $result, $comment );
}
}
if ( my $result = $vars->{molecular_summary} ) {
# SDL2111: IgH-ASO-PCR [Done / Not Done]
# SDL2112: IgH-ASO-PCR Result [not evaluable, monoclonal, oligoclonal, polyclonal]
# SDL2113: IgH-ASO-PCR Base-pair length [only if monoclonal; numeric]
# SDL2114: IgH-ASO-PCR Comment [free text]
if ( $result =~ /IgH-ASO assay\:$words_re/ ) { # warn $1;
push @rows, join '|', @common, ( 'SDL2111', &$trim($1), $NULL );
}
if ( $result =~ /IgH-ASO result\:$igh_aso_re/ ) { # warn $1;
push @rows, join '|', @common, ( 'SDL2112', lc &$trim($1), $NULL );
}
if ( $result =~ /IgH-ASO bp length\:\s?(\d+)/ ) { # warn $1;
push @rows, join '|', @common, ( 'SDL2113', $1, $NULL );
}
if ( $result =~ /IgH-ASO comment\:(.*)/ ) { # warn $1;
push @rows, join '|', @common, ( 'SDL2114', &$trim($1), $NULL );
}
}
# SDL2115: BCL2 break-apart [Positive, Negative, Not Evaluable]
if ( my $val = $vars->{BCL2} ) {
my $result = $val =~ /rearranged/
? 'Positive'
: $val eq 'failed'
? 'Not Evaluable' : 'Negative';
push @rows, join '|', @common, ( 'SDL2115', $result, $NULL );
}
# SDL2116: BCL6 break-apart [Positive, Negative, Not Evaluable]
if ( my $val = $vars->{BCL6} ) {
my $result = $val =~ /rearranged/
? 'Positive'
: $val eq 'failed'
? 'Not Evaluable' : 'Negative';
push @rows, join '|', @common, ( 'SDL2116', $result, $NULL );
}
# SDL2117: t(14;18) fusion assay [Positive, Negative, Not Evaluable]
if ( my $val = $vars->{BCL2_IgH} ) {
my $result = $val =~ /translocation/
? 'Positive'
: $val eq 'failed'
? 'Not Evaluable' : 'Negative';
push @rows, join '|', @common, ( 'SDL2117', $result, $NULL );
}
{
# SDL2123: Kappa light chain [+, -, +/-, Not Evaluable]
# SDL2134: Lambda light chain [+, -, +/-, Not Evaluable]
1;
}
{ # SDL2118: FISH comment [free text]
if ( my $result = $vars->{fish_summary} ) {
push @rows, join '|', @common, ( 'SDL2118', $result, $NULL );
}
}
{ # SDL2094: Final diagnosis
push @rows, join '|', @common, ( 'SDL2094', $vars->{diagnosis}, $NULL );
}
{ # SDL2095: Discrepancy with local diagnosis [Yes, No]
# SDL2096: Urgent report required [Yes, No]
# SDL2097: Urgent block return required [Yes, No]
1;
}
{ # SDL2098: Insufficient material, retest required [Yes, No]
my $result = $vars->{diagnosis} eq 'Inadequate sample' ? 'Yes' : 'No';
push @rows, join '|', @common, ( 'SDL2098', $result, $NULL );
}
{ # SDL2125: ICDO3 code
if ( my $icdo3 = $vars->{icdo3} ) {
push @rows, join '|', @common, ( 'SDL2125', $icdo3, $NULL );
}
}
}
exit if ! @rows;
{ # check accession number / test code is unique:
my %h;
for (@rows) {
my @data = split '\|'; # warn dump @data;
die "duplicate $data[2] / $data[6] combination"
if $h{$data[2]}{$data[6]}++; # [2] = accession number; [6] = test code
}
}
{ # xl spreadsheet (do this before .dat)
my $xl = Spreadsheet::WriteExcel::Simple->new;
for (@rows) {
my @row = split '\|';
$xl->write_row(\@row);
}
# save to file if just testing:
$xl->save("$Bin/$filename.xls") if $JUST_TESTING;
my %mail = (
attachment => $xl->data,
filename => "$filename.xls", # BO21223_yyyymmdd
subject => $subject,
config => $config,
);
$tools->send_mail(\%mail, \@recipients);
}
=begin
{ # pipe-delimited file:
my $data = join "\n", @rows;
$filename .= '.dat'; # can append now
$data > io($Bin.'/'.$filename) if $JUST_TESTING;
my %mail = (
attachment => $data,
filename => $filename, # BO21223_yyyymmdd.dat
subject => $subject,
config => $config,
);
$tools->send_mail(\%mail, \@recipients);
}
=cut
# print duplicate accession number data to file:
my @ary = map $results{$_}, grep { scalar @{ $results{$_} } > 1 } keys %results;
( dump @ary ) > io($Bin.'/quintiles_duplicates.txt') if $JUST_TESTING;
=begin fields:
SDL2086: HMDS
SDL2087: [First Time Reviewer / Adjudicator]
SDL2088: [Anatomical localisation of biopsy: see list]
SDL2089: [Sample quality: Good/Average/Poor]
SDL2090: [Follicular lymphoma grade: 1, 2, 3a, 3b]
SDL2091: Morphology comment [free text]
SDL2092: Lymphoma involvement [%]
SDL2093: Pattern of involvement [interstitial, diffuse, nodular, intrasinusoidal]
SDL2099 - SDL2110 - test results [+, -, +/-, Not Evaluable]
SDL2111: IgH-ASO-PCR [Done / Not Done]
SDL2112: IgH-ASO-PCR Result [not evaluable, monoclonal, oligoclonal, polyclonal]
SDL2113: IgH-ASO-PCR Base-pair length [only if monoclonal; numeric]
SDL2114: IgH-ASO-PCR Comment [free text]
SDL2115: BCL2 break-apart [Positive, Negative, Not Evaluable]
SDL2116: BCL6 break-apart [Positive, Negative, Not Evaluable]
SDL2117: t(14;18) fusion assay [Positive, Negative, Not Evaluable]
SDL2118: FISH comment [free text]
SDL2094: Final diagnosis
SDL2095: Discrepancy with local diagnosis [Yes, No]
SDL2096: Urgent report required [Yes, No]
SDL2097: Urgent block return required [Yes, No]
SDL2098: Insufficient material, retest required [Yes, No]
SDL2125: ICDO3 code
SDL2123: Kappa light chain [+, -, +/-, Not Evaluable]
SDL2134: Lambda light chain [+, -, +/-, Not Evaluable]
9-field data file:
1: HMDS
2: HMDS_ref [optional]
3: accession number ie pc.unit_number
4: Patient ID [optional]
5: DoB [optional]
6: Visit description [optional]
7: QLAB test code ie SDLxxxx
8: Test result [or CANCELLED + comment in next field]
9: Comment [only for cancelled results]
=cut