#!/usr/bin/perl

=begin -------------------------------------------------------------------------
Generates pipe-delimited txt file on Roche Gallium trial cases for Quintiles.
Record must be authorised to be included in data feed. Later requests on same
QLABS accession number (Ennnnnnn) supercede earlier ones
Quintiles require unique accession number / test code combination - tested for
at end

TODO: what does digit terminal mean?
=cut ---------------------------------------------------------------------------

use strict;
use warnings;

my $JUST_TESTING = 1; # dump data to file

use lib '/home/raj/perl5/lib/perl5';
use FindBin qw($Bin); # warn $Bin;
use lib "$Bin/../../../lib";

use IO::All;
use Data::Dump qw(dump);
use LIMS::Local::ScriptHelpers;

my $tools = LIMS::Local::ScriptHelpers->new();
$tools->test_only($JUST_TESTING);

################################################################################
# my $duration = 7; # over past number of days - retrieving all data
my @recipients = qw(hmds.lth@nhs.net); # send to departmental addr for manual entries
my $filename = sprintf 'BO21223_%s.dat', $tools->time_now->ymd;
my $subject  = $filename;
################################################################################

# get tools from LIMS::Local::ScriptHelpers:
my $sql_lib = $tools->sql_lib();
my $config  = $tools->config(); 
my $dbix    = $tools->dbix();

# Quintiles test codes => HMDS test_name map:
my %SDL = (
	cd3 	=> 'SDL2110',
	cd5 	=> 'SDL2099',
	cd10	=> 'SDL2100',
	cd20	=> 'SDL2101',
	cd23	=> 'SDL2102',
	bcl1	=> 'SDL2104', # = Cyclin D1
	bcl2	=> 'SDL2103',
	bcl6 	=> 'SDL2108',
	irf4	=> 'SDL2106',
	ki67 	=> 'SDL2109',
	fox_p1	=> 'SDL2107',
    'bcl-2' => 'SDL2115', # FISH
    'bcl-6' => 'SDL2116', # FISH    
);

my $results = do {
    my $sql = $sql_lib->retr('gallium_trial_data_quintiles'); # warn $sql;
    $dbix->query($sql)->hashes;
};

my %results; # AoH

ROW: # group data by accession number:
for my $row ( @$results ) { # warn dump $row;
	# only want data where accession number in QLAB format (Ennnnnnn-n):
    my ($accession_number) = $row->{accession_number} =~ /^(E\d+)\-\d/; 
    next ROW unless $accession_number; # warn $accession_number;
    # collect as array of datasets for each accession number:
    push @{ $results{$accession_number} }, $row;
}

my @rows;

# process sole - or most recent if >1 - dataset for each accession number:
for my $accession_number ( sort keys %results ) { # warn dump $accession_number;
    my $data = $results{$accession_number};
    
    # $data is either hashref or array(ref) of hashrefs:
    my $vars = ref $data eq 'ARRAY'
        ? $data->[-1] # use most recent dataset (query ensures request.id order)
        : $data; # warn dump $vars; 

	my $NULL = ""; # to avoid need for no warnings 'uninitialized'
	
	my @common = ( # common fields for all rows:
		'Leeds', # referring lab
		$vars->{hmds_ref}, # HMDS number
		$accession_number, # QLABS accession number (1st 8 chars only)
		$vars->{patient_id} || $NULL, # subject ID
		$NULL, # $vars->{dob} || $NULL, # dd-MON-YYYY format required !!
		'TISSUE FOR PATH REVIEW',
	);
	
	{ # SDL2087: [First Time Reviewer / Adjudicator]
		my $result = $vars->{clinical_details} =~ /referred from Kiel/i
			? 'Adjudicator' : 'First Time Reviewer';
		push @rows, join '|', @common, ( 'SDL2087', $result, $NULL );
	}
	{   # SDL2088: [Anatomical localisation of biopsy]
	    # SDL2093: [Pattern of involvement: interstitial, diffuse, etc]
	    # can't extract anatomical site or pattern of involvment from comment yet:
	    push @rows, join '|', @common, ( 'SDL2088', 'CANCELLED', 'see morphology comment' );
	    push @rows, join '|', @common, ( 'SDL2093', 'CANCELLED', 'see morphology comment' );
    }
    { # SDL2090: [Follicular lymphoma grade: 1, 2, 3a, 3b]
	    # extract FL grade (1|I|2|II|3|III) - only if FL:
        if ( $vars->{diagnosis} =~ /^Follicular lymphoma/ ) { # can't use ICDO3 (DLBCL with FL)
            # use case-sensitive regex to capture "grade I" but not "grade is":
            my ($grade) = $vars->{comment} =~ /\b([Gg]rade\s[\d|I+]\w?)\b/; 
            if ($grade) { # warn Dumper $grade;
                if ( my $n = $grade =~ tr/I// ) { # convert Roman to Arabic digits: 
                    $grade =~ s/(I+)/$n/; # $n = count of I's
                } # warn Dumper $grade;
                push @rows, join '|', @common, ( 'SDL2090', lc $grade, $NULL );
            }
        }
	}
	{ # SDL2089: [Sample quality: Good/Average/Poor]
		my $result = $vars->{specimen_quality};
		$result =~ s/adequate/good/; # only using adequate & poor
		push @rows, join '|', @common, ( 'SDL2089', ucfirst $result, $NULL );
	}
	{ # SDL2091: Morphology comment [free text]
        my $comment = $vars->{comment};
        $comment =~ s/[\n\r]/ /g; # replace new-lines with spaces
		push @rows, join '|', @common, ( 'SDL2091', $comment, $NULL );
	}
	{ # SDL2092: Lymphoma involvement [%]
		push @rows, join '|', @common, ( 'SDL2092', 100, $NULL ); # temp assessment
	}
	{ #	SDL2099 - SDL2110 marker results [+, -, +/-, Not Evaluable]
		TEST:
        while ( my ($marker, $test_code) = each %SDL ) {
			my $result  = $vars->{$marker} || next TEST;
			# TODO: might need to examine specimen type for suitability -> comment:
			my $comment = $vars->{$marker} ? $NULL : 'Test not performed';
			push @rows, join '|', @common, ( $test_code, $result, $comment );
		}
		# CD79a / SDL2105 not done:
		# push @rows, join '|', @common, ( 'SDL2105', 'CANCELLED', 'Test not performed' );
	}
	{ # SDL2111: IgH-ASO-PCR [Done / Not Done]
	  #	SDL2112: IgH-ASO-PCR Result [not evaluable, monoclonal, oligoclonal, polyclonal]
	  # SDL2113: IgH-ASO-PCR Base-pair length [only if monoclonal; numeric]
      # SDL2114: IgH-ASO-PCR Comment [free text]
	  # SDL2115: BCL2 break-apart [Positive, Negative, Not Evaluable]
	  # SDL2116: BCL6 break-apart [Positive, Negative, Not Evaluable]
	  # SDL2117: t(14;18) fusion assay [Positive, Negative, Not Evaluable]
	  # SDL2123: Kappa light chain [+, -, +/-, Not Evaluable]
	  # SDL2134: Lambda light chain [+, -, +/-, Not Evaluable]
      1;
	}
	{ # SDL2118: FISH comment [free text]
        if ( my $result = $vars->{fish_summary} ) {
            push @rows, join '|', @common, ( 'SDL2118', $result, $NULL );
        }
	}
	{ # SDL2094: Final diagnosis
		push @rows, join '|', @common, ( 'SDL2094', $vars->{diagnosis}, $NULL );
	}
	{ # SDL2095: Discrepancy with local diagnosis [Yes, No]
	  # SDL2096: Urgent report required [Yes, No]
	  # SDL2097: Urgent block return required [Yes, No]
        1;
	}
	{ # SDL2098: Insufficient material, retest required [Yes, No]
		my $result = $vars->{diagnosis} eq 'Inadequate sample' ? 'Yes' : 'No';
		push @rows, join '|', @common, ( 'SDL2098', $result, $NULL );
	}
	{ # SDL2125: ICDO3 code
        if ( my $icdo3 = $vars->{icdo3} ) {
            push @rows, join '|', @common, ( 'SDL2125', $icdo3, $NULL );
        }
	}
}

exit if ! @rows;

{ # check accession number / test code is unique:
	my %h;
	for (@rows) {
		my @data = split '\|'; # warn dump @data;
		die "duplicate $data[2] / $data[6] combination"
			if $h{$data[2]}{$data[6]}++; # [2] = accession number; [6] = test code
	}
}

{ # pipe-delimited file:
    my $data = join "\n", @rows;

	$data > io($Bin.'/'.$filename) if $JUST_TESTING;

    my %mail = (
        attachment => $data,
        filename   => $filename, # BO21223_yyyymmdd.dat
        subject    => $subject,
        config     => $config,
    );
#    $tools->send_mail(\%mail, \@recipients);
}

# print duplicate accession number data to file:
my @ary = map $results{$_}, grep { scalar @{ $results{$_} } > 1 } keys %results; 
( dump @ary ) > io($Bin.'/quintiles_duplicates.txt') if $JUST_TESTING;

=begin fields:
SDL2086: HMDS
SDL2087: [First Time Reviewer / Adjudicator]
SDL2088: [Anatomical localisation of biopsy: see list]
SDL2089: [Sample quality: Good/Average/Poor]
SDL2090: [Follicular lymphoma grade: 1, 2, 3a, 3b]
SDL2091: Morphology comment [free text]
SDL2092: Lymphoma involvement [%]
SDL2093: Pattern of involvement [interstitial, diffuse, nodular, intrasinusoidal]
SDL2099 - SDL2110 - test results [+, -, +/-, Not Evaluable]
SDL2111: IgH-ASO-PCR [Done / Not Done]
SDL2112: IgH-ASO-PCR Result [not evaluable, monoclonal, oligoclonal, polyclonal]
SDL2113: IgH-ASO-PCR Base-pair length [only if monoclonal; numeric]
SDL2114: IgH-ASO-PCR Comment [free text]
SDL2115: BCL2 break-apart [Positive, Negative, Not Evaluable]
SDL2116: BCL6 break-apart [Positive, Negative, Not Evaluable]
SDL2117: t(14;18) fusion assay [Positive, Negative, Not Evaluable]
SDL2118: FISH comment [free text]
SDL2094: Final diagnosis
SDL2095: Discrepancy with local diagnosis [Yes, No]
SDL2096: Urgent report required [Yes, No]
SDL2097: Urgent block return required [Yes, No]
SDL2098: Insufficient material, retest required [Yes, No]
SDL2125: ICDO3 code
SDL2123: Kappa light chain [+, -, +/-, Not Evaluable]
SDL2134: Lambda light chain [+, -, +/-, Not Evaluable]

9-field data file:
1: HMDS
2: HMDS_ref [optional]
3: accession number ie pc.unit_number
4: Patient ID [optional]
5: DoB [optional]
6: Visit description [optional]
7: QLAB test code ie SDLxxxx
8: Test result [or CANCELLED + comment in next field]
9: Comment [only for cancelled results]
=cut
