#!/usr/bin/env perl

=begin -------------------------------------------------------------------------
Generates spreadsheet on Gallium trial cases. Authorised cases only. Requires
ICDO3 or one of following diagnoses: 'no evidence of lymphoma', 'unsuitable
specimen', 'inadequate sample'. Anything else picked up by gallium_pending.pl
=cut ---------------------------------------------------------------------------

use Getopt::Std;
getopts('etq'); # email (suppress), testing, sql-trace
our($opt_e,$opt_t,$opt_q);

use strict;
use warnings;

use lib '/home/raj/perl5/lib/perl5';
use Data::Printer;
use SQL::Abstract::More;
use DateTime::Format::MySQL;
use Spreadsheet::WriteExcel::Simple;

use FindBin qw($Bin); # warn $Bin;
use lib "$Bin/../../../lib";
use LIMS::Local::Utils;
use LIMS::Local::GalliumData;
use LIMS::Local::ScriptHelpers;

my $tools = LIMS::Local::ScriptHelpers->new();
my $today = $tools->time_now->ymd(''); # yyyymmdd

################################################################################
my $SUPPRESS_EMAIL = $opt_e;
my $JUST_TESTING   = $opt_t || 0; # save locally, email to raj only (if ! $SUPPRESS_EMAIL)

my $subject  = sprintf 'BO21223_%s', $today;
my $filename = $subject . '.xls'; # warn $filename;
my @recipients = qw(
	hmds.lth@nhs.net
    qlabs.data_imports
);

use constant DATE_FORMAT => '%d-%b-%Y'; # required format
################################################################################
$tools->test_only($JUST_TESTING);

# xl file headers:
my $col_headers = _col_headers(); # p $col_headers; exit;

push @recipients, 'raj' if $JUST_TESTING; # will be sole recipient

# get tools from LIMS::Local::ScriptHelpers:
my $sql_lib = $tools->sql_lib();
my $config  = $tools->config();
my $dbix    = $tools->dbix();

my $cfg_file = "$Bin/../lib/settings.cfg";
my $settings = Config::Auto::parse($cfg_file); # warn Dumper $settings; exit;

# permitted non-ICDO3 diagnoses:
my $non_icdo3 = $settings->{non_icdo3}; # warn Dumper $non_icdo3;

# ensure case-sensitivity of col names preserved (eg bcl2 & BCL2):
$dbix->lc_columns = 0;

my $nearest = sub { LIMS::Local::Utils::nearest_value(@_) };
my $round   = sub { LIMS::Local::Utils::round_value(@_) };

{ # exit unless any requests updated this month:
    my ($query, @bind) = _get_updated_requests_count();
    $dbix->query($query, @bind)->list || exit;
}

my $requests = do {
    my $sql = $sql_lib->retr('gallium_data'); # warn $sql;
        $dbix->dump_query($sql, @$non_icdo3) if $opt_q;
    $dbix->query($sql, @$non_icdo3); # permitted non-ICDO3 diagnoses
};

my @data = my @errors = ();
ROW: while ( my $vars = $requests->hash ) { # warn Dumper $vars; next;
	my $hmds_ref =  $vars->{hmds_ref};

    # need to check "optional" comment field exists or will get fatal error:
    unless ($vars->{comment}) {
        push @errors, "empty comment field $vars->{hmds_ref}";
        next ROW;
    }

	my $gallium = LIMS::Local::GalliumData->new(data => $vars);

    my @row = (); # reset

	# 1) Lab ID (always 21223LEEDS)
	push @row, '21223LEEDS';

    # 2) QLABS accession number:
    push @row, $vars->{accession_number};

    # 3) lab origin (always Leeds):
    push @row, 'LEEDS';

    # 4) registration date (DD-MON-YYYY):
    push @row, _date_transform($vars->{registered});

    # 5) Leeds lab number:
    push @row, $vars->{hmds_ref};

    # 6) Kiel lab number:
    push @row, 'NOT APPLICABLE';

    { # 7) Biopsy type:
        my $result = $gallium->get_biopsy_type || 'OTHER';
        push @row, uc $result; # warn $result;
    }

    { # 8, 9, 10) slides & blocks:
        my $h = $gallium->get_external_reference; # warn Dumper $h;
        { # either, both or neither:
            if ( $h->{slides} && $h->{blocks} ) {
                push @row, 'BOTH';
            }
            elsif ($h->{slides}) {
                push @row, 'SLIDES';
            }
            elsif ($h->{blocks}) {
                push @row, 'BLOCKS';
            }
            else {
                push @row, 'NONE';
            }
        }
        push @row, $h->{slides} || 0; # number of slides
        push @row, $h->{blocks} || 0; # number of blocks
    }

    # 11) specimen quality:
    push @row, uc $vars->{specimen_quality};

    # 12) date of diagnosis (= authorised; DD-MON-YYYY):
    push @row, _date_transform($vars->{authorised});

    { # 13, 14, 15) diagnosis, lymphoma grade, reason for no diagnosis:
        my $diagnosis = $gallium->get_diagnosis;
        my $grade     = $gallium->get_lymphoma_grade; # returns 0 if not FL
        my $icdo3     = $gallium->get_icdo3; # returns 0 if not ICDO3

        { # QLABS won't accept our terms for B-CLL or CD5-neg B-LPD:
            $diagnosis = 'CHRONIC LYMPHOCYTIC LEUKEMIA B-CELL TYPE'
                if $diagnosis =~ /chronic lymphocytic leukaemia/;
            $diagnosis =~ s/B-cell (LPD) NOS/lymphoproliferative disease/;
        }
        # diagnosis (override entry if no ICDO3):
        push @row, $icdo3
            ? ( join ' - ', $icdo3, uc $diagnosis )
            : 'NO DIAGNOSIS MADE';
        { # lymphoma grade - only applicable to FL:
            if ($grade) {
                my %h = (
                    '1'   => '9695/3',
                    '2'   => '9691/3',
                    '3a'  => '9698/3',
                    '3b'  => '9698/3',
                );
                push @row, $h{$grade}
                    ? sprintf '%s - FL GRADE %s', $h{$grade}, uc $grade
                    : 'UNGRADED';
            }
            elsif ( $icdo3 =~ /^9690/ ) { # FL without defined $grade
                push @row, 'UNGRADED';
            }
            else { push @row, 'NOT APPLICABLE' }
        }
        { # reason for no diagnosis - only applicable to non-ICDO3 diagnoses:
            push @row, $icdo3
                ? 'NOT APPLICABLE' # don't need reason
                : uc $diagnosis; # will be one of permitted entries in $non_icdo3
        }
    }

    { # 16) eligible for trial:
        my $result = $gallium->get_trial_elegibility;
		unless ( grep lc $result eq $_, qw/yes no/ ) { # warn Dumper $result;
			push @errors, qq!trial eligibility "$result" invalid for $hmds_ref!;
			next ROW;
		}
        push @row, uc $result; # default handled by get_trial_elegibility()
    }

    { # 17) discrepancy with local diagnosis
        my $result = $gallium->is_discrepant_diagnosis || 'no'; # returns false if not
		unless ( grep lc $result eq $_, qw/yes no/ ) { # warn Dumper $result;
			push @errors, qq!discrepant diagnosis "$result" invalid for $hmds_ref!;
			next ROW;
		}
        push @row, uc $result; # default NO
    }

    { # 18) adjudicated # query now exclusively Quintiles src so will always be NO
        my $result = $gallium->get_lab_role; # warn Dumper $result;
		push @row, lc $result eq 'adjudicator' ? 'YES' : 'NO';
	}

    { # 19) discrepant localisation
        my $result = $gallium->is_discrepant_localisation || 'no';
		unless ( grep lc $result eq $_, qw/yes no/ ) { # warn Dumper $result;
			push @errors, qq!discrepant localisation "$result" invalid for $hmds_ref!;
			next ROW;
		}
        push @row, uc $result; # default NO
    }

    { # 20) lymphoma involvement (%)
        my $result = $gallium->get_lymphoma_involvement; # warn $result;
        push @row, defined $result
            ? &$nearest(10, $result) # rounds to nearest 10
            : 'NOT APPLICABLE'; # could be zero
    }

    { # 21 .. 34 # IHC
        my @markers = qw( cd3 cd5 cd10 cd20 cd23 cd79 ki67 cyclin_d1 bcl2 bcl6
            foxp1 irf4 kappa lambda );

        my $results = $gallium->get_test_results(\@markers); # hashref

        for my $t(@markers) {
            my $result = $results->{$t};
            push @row, defined $result ? $result : 'NOT DONE'; # could be zero
        }
    }

    { # 35) t(14;18) fusion
        my $result = $gallium->get_fish_fusion_assay_result('BCL2_IgH');
        push @row, uc $result || 'NOT DONE';
    }

    { # 36, 37, 38) BCL2, BCL6 & IgH break-apart
        for ( qw/BCL2 BCL6 IgH/ ) {
            my $result = $gallium->get_fish_break_apart_result($_);
            push @row, uc $result || 'NOT DONE';
        }
	}

    { # 39, 40 BioMed2 IgH Clonality & BP length
        my $result = $gallium->get_igh_pcr; # warn Dumper $result; # hashref

        my $igh_status = $result->{igh_pcr_assay} || 'NOT DONE';

        # if IgH clonality done:
        if ( lc $igh_status eq 'done' ) {
            push @row, uc $result->{igh_pcr_result} || 'NOT DONE';
            push @row, $result->{igh_pcr_bp_length} || 'NOT DONE';
        }
        else {
            push @row, ('NOT DONE', 'NOT DONE');
        }
    }

	{ # 41 .. 46 RNA results:
        my $result = $gallium->get_rna_results; # warn Dumper $result; # hashref
        # extraction done:
        push @row, %$result ? 'YES' : 'NO';
        # rna purity (should be to 2dp):
        push @row, $result->{rna_purity} ?
            sprintf '%.2f', $result->{rna_purity} : 'NOT APPLICABLE';
        # rna concentration (integer):
        push @row, defined $result->{rna_concentration} ? # probably can't be 0
            &$round($result->{rna_concentration}) : 'NOT APPLICABLE';
        { # rna aliquots & volume per aliquot:
            no warnings 'uninitialized'; # maybe no $result->{rna_volume}
            my ($number, $volume) = split ' x ', $result->{rna_volume}; # eg 2 x 15;
            push @row, $number || 'NOT APPLICABLE';
            push @row, $volume || 'NOT APPLICABLE';
        }
        { # number of rna sections:
            no warnings 'uninitialized'; # maybe no $result->{rna_sections}
            my ($number, $t) = split ' x ', $result->{rna_sections}; # eg 10 x 5;
            push @row, $number || 'NOT APPLICABLE'; # don't need thickness ($t)
        }
	}

	{ # 47 .. 52 DNA results:
        my $result = $gallium->get_dna_results; # warn Dumper $result; # hashref
        # extraction done:
        push @row, %$result ? 'YES' : 'NO';
        # dna purity (should be to 2dp):
        push @row, $result->{dna_purity} ?
            sprintf '%.2f', $result->{dna_purity} : 'NOT APPLICABLE';
        # dna concentration (integer):
        push @row, defined $result->{dna_concentration} ? # probably can't be 0
            &$round($result->{dna_concentration}) : 'NOT APPLICABLE';
        { # dna aliquots & volume per aliquot:
            no warnings 'uninitialized'; # maybe no $result->{dna_volume}
            my ($number, $volume) = split ' x ', $result->{dna_volume}; # eg 2 x 15;
            push @row, $number || 'NOT APPLICABLE';
            push @row, $volume || 'NOT APPLICABLE';
        }
        { # number of dna sections:
            no warnings 'uninitialized'; # maybe no $result->{dna_sections}
            my ($number, $t) = split ' x ', $result->{dna_sections}; # eg 10 x 5;
            push @row, $number || 'NOT APPLICABLE'; # don't need thickness ($t)
        }
	}

    { # 53 .. 57 TMA
        my $result = $gallium->get_tma_results; # warn Dumper $result; # hashref

        # was TMA taken [YES, or reason if not]:
        my $tma_status = $result->{tma_status};

        # if TMA taken:
        if ( $tma_status && uc $tma_status eq 'YES' ) {
            my $tma_position = $result->{tma_position};
            my $tma_block    = $result->{tma_block};

            if ( $tma_block && $tma_position ) {
                push @row, 'YES'; # TMA taken
                push @row, 'NOT APPLICABLE'; # reason
                push @row, $result->{tma_cores} || 0; # no. of TMA cores [1/2]
                push @row, 'GALLIUM-Leeds-' . $tma_block; # name of TMA
                push @row, $tma_position; # position on TMA
            }
            else { # pending manual edit (probably regex failed on core positions)
                push @errors, "no TMA block/position for $vars->{hmds_ref}";
                push @row, ('PENDING','PENDING');
                push @row, 'NOT APPLICABLE' for (1..3);
            }
        }
        else { # $result->{tma_taken} != YES, or no $result:
            push @row, $tma_status ? 'NO' : 'PENDING'; # ie tma_status != YES
            push @row, $tma_status ? ( uc $tma_status ) : 'PENDING'; # reason
            push @row, 'NOT APPLICABLE' for (1..3); # no of cores, name & position
        }
    }
    { # 58, 59, 60 sections (which ones, DNA, RNA or what else?)
        push @row, 'NO'; # sections taken?
        push @row, 'NOT APPLICABLE'; # reason not taken [or INSUFFICIENT MATERIAL]
        push @row, 0; # no. of sections [or NOT APPLICABLE]
    }

    push @data, \@row; # warn Dumper \@row;
}

if (@errors) {
    my $msg = "Gallium trial request errors:\n\t" . join "\n\t", @errors;
	$JUST_TESTING ? warn $msg : $tools->mail_admin({ script => $0, msg => $msg });
}

exit if ! @data;

{ # construct excel spreadsheet:
    my $xl = Spreadsheet::WriteExcel::Simple->new;
	$xl->write_bold_row($col_headers); # arrayref
	$xl->write_row($_) for @data;
    # save to file if just testing:
    $xl->save($Bin.'/'.$filename) if $JUST_TESTING;

    _send_mail($xl->data) unless $SUPPRESS_EMAIL;
}

sub _get_updated_requests_count {
    my @rels = (
        'requests|r'                =>  'r.patient_case_id=pc.id'       ,
        'patient_case|pc'           =>  'pc.referral_source_id=rs.id'   ,
        'referral_sources|rs'       =>  'rt.request_id=r.id'            ,
        'request_trial|rt'          =>  'rt.trial_id=ct.id'             ,
        'clinical_trials|ct'        =>  'rrd.request_id=r.id'           ,
        'request_report_detail|rrd' =>  'rrd.diagnosis_id=d.id'         ,
        'diagnoses|d'
	);
    my $ref_date = $tools->date_subtract(months => 1);
    my @args = (
		-columns  => 'COUNT(*)',
		-from     => [ -join => @rels ],
		-where    => {
            'ct.trial_name'   => 'Roche Gallium',
            'rs.display_name' => { -like => 'Quintiles%' },
            -or => {
                'd.icdo3' => { '!=' => undef     },
                'd.name'  => { -in => $non_icdo3 },
            },
            'DATE(rrd.updated_at)' => { '>=' => $ref_date->ymd },
        },
	); # p @args;
	my ($sql, @bind) = SQL::Abstract::More->new->select(@args); # p $sql; # p \@bind;
        $dbix->dump_query($sql, @bind) if $opt_q;
    return ($sql, @bind);
}

sub _date_transform {
    my $date = DateTime::Format::MySQL->parse_date(@_)->strftime(DATE_FORMAT);
    $date =~ tr/[a-z]/[A-Z]/; # warn $date; # dd-mon-yyyy -> dd-MON-yyyy
    return $date;
}

sub _send_mail {
    my $data = shift;

    my %mail = (
        attachment  => $data,
        filename    => $filename,
        subject     => $subject,
        message     => 'FAO Pradeep Vardhaman <Vardhaman.Pradeep@quintiles.com>',
        config      => $config,
    ); # warn Dumper \%mail;
    $tools->send_mail(\%mail, \@recipients)
}

sub _col_headers {
	my @cols = (
		[ NULL	  => 'lab_id'					], # don't need col header
		[ NULL	  => 'accession_number'			], # don't need col header
		[ SDL2086 => 'lab_origin'				],
		[ SDL2605 => 'registered'				],
		[ SDL2606 => 'hmds_ref'					],
		[ SDL2607 => 'kiel_ref'					],
		[ SDL2608 => 'biopsy_type'				],
		[ SDL2609 => 'slides_or_blocks'			],
		[ SDL2610 => 'slides'					],
		[ SDL2611 => 'blocks'					],
		[ SDL2089 => 'specimen_quality'			],
		[ SDL2612 => 'reported'					],
		[ SDL2094 => 'diagnosis'				],
		[ SDL2090 => 'grade'					],
		[ SDL2703 => 'no_diagnosis_reason'		],
		[ SDL2613 => 'elegible'					],
		[ SDL2095 => 'discrepant_diagnosis'		],
		[ SDL2614 => 'adjudicated'				],
		[ SDL2615 => 'discrepant_localisation'	],
		[ SDL2092 => 'lymphoma_involvement'		],
		[ SDL2110 => 'cd3'						],
		[ SDL2099 => 'cd5'						],
		[ SDL2100 => 'cd10'						],
		[ SDL2101 => 'cd20'						],
		[ SDL2102 => 'cd23'						],
		[ SDL2105 => 'cd79'						],
		[ SDL2109 => 'ki67'						],
		[ SDL2104 => 'cyclin_d1'				],
		[ SDL2103 => 'bcl2'						],
		[ SDL2108 => 'bcl6'						],
		[ SDL2107 => 'foxp1'					],
		[ SDL2106 => 'irf4'						],
		[ SDL2123 => 'kappa'					],
		[ SDL2134 => 'lambda'					],
		[ SDL2117 => 't(14;18)_fusion'			],
		[ SDL2115 => 'BCL2_breakapart'			],
		[ SDL2116 => 'BCL6_breakapart'			],
		[ SDL2638 => 'IgH_breakapart'			],
		[ SDL2639 => 'IgH_clonality'			],
		[ SDL2113 => 'IgH_BP_length'			],
		[ SDL2616 => 'rna_extraction'			],
		[ SDL2617 => 'rna_purity'				],
		[ SDL2618 => 'rna_concentration'		],
		[ SFL2619 => 'rna_aliquots'				],
		[ SDL2620 => 'rna_volume'				],
		[ SDL2621 => 'rna_sections'				],
		[ SDL2623 => 'dna_extraction'			],
		[ SDL2624 => 'dna_purity'				],
		[ SDL2625 => 'dna_concentration'		],
		[ SDL2626 => 'dna_aliquots'				],
		[ SDL2627 => 'dna_volume'				],
		[ SDL2628 => 'dna_sections'				],
		[ SDL2629 => 'tma_taken'				],
		[ SDL2630 => 'tma_reason'				],
		[ SDL2631 => 'tma_cores'				],
		[ SDL2632 => 'tma_name'					],
		[ SDL2633 => 'tma_position'				],
		[ SDL2634 => 'sections_taken'			],
		[ SDL2635 => 'reason'					],
		[ SDL2636 => 'sections'					],
	);

	my @headers = map $_->[0], @cols; # warn Dumper \@headers;
	# convert NULL's to empty string:
	map { s/NULL//; } @headers; # warn Dumper \@headers;
	return \@headers;
}

__DATA__
Qlabs Accession number
Central Pathology Lab [LEEDS]
Registered Date at Central Pathology [DDMMMYYYY]
Accession no in central pathology-Leeds: 5 digit number/2 digit year
Accession no in central pathology-Kiel: [NOT APPLICABLE]
Biopsy [FINE NEEDLE ASPIRATION / CORE BIOPSY / SURGICAL EXCISION / OTHER]
Slides or Blocks [SLIDES / BLOCKS]
Number of Slides Received [1-100]
Number of Blocks Received [1-10]
Specimen Quality [ADEQUATE / INADEQUATE / POOR / NO TISSUE LEFT]
Date of Final Diagnosis by Central Pathology [DDMMMYYYY]
Final Diagnosis by Central Pathology (including Adjudication) According to Modified WHO 2011
FL Grades [9695/3 - FL GRADE 1 / 9691/3 - FL GRADE 2 / 9698/3 - FL GRADE 3A / 9698/3 - FL GRADE 3B / UNGRADED]
Reason Why No Diagnosis Made [INSUFFICIENT MATERIAL FOR CENTRAL PATHOLOGY - QUINTILES TO REQUEST
        ANOTHER SAMPLE / UNSUITABLE MATERIAL - QUINTILES TO REQUEST ANOTHER SAMPLE / NO
        LYMPHOMA IN SPECIMEN - QUINTILES TO REQUEST ANOTHER SAMPLE / NOT APPLICABLE]
Eligible to Gallium [YES / NO / PENDING]
Discrepancy with Diagnosis of Local Pathology [YES - QUINTILES TO NOTIFY SITE / NO]
Adjudicated [YES / NO]
Discrepant Localization [YES - QUINTILES TO REQUEST ANOTHER SAMPLE / NO]
Bone Marrow Result-% of Lymphoma Involvement [ 0, 10, 20 .. 100 / NOT APPLICABLE]
CD3 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
CD5 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
CD10 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
CD20 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
CD23 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
CD79A [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
KI67 [0, 10, 20 .. 100 / NOT DONE / NOT EVALUABLE]
Cyclin D1 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
Bcl-2 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
Bcl-6 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
Foxp1 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
IRF4 [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
kappa [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
lambda [+ or - or +/- /  NOT DONE / NOT EVALUABLE]
t(14;18) Fusion [POSITIVE / NEGATIVE / NOT DONE / NOT EVALUABLE]
Bcl-2 Break-Apart [POSITIVE / NEGATIVE / NOT DONE / NOT EVALUABLE]
Bcl-6 Break-Apart [POSITIVE / NEGATIVE / NOT DONE / NOT EVALUABLE]
IgH Break-Apart [POSITIVE / NEGATIVE / NOT DONE / NOT EVALUABLE]
BioMed2 IgH Clonality [MONOCLONAL / OLIGOCLONAL / POLYCLONAL / NO MARKER / NOT DONE / NOT EVALUABLE]
BioMed2 IgH Clonality Length of Clonal Peak in Base Pairs (if IgH monoclonal) [0-1000 / NOT DONE]
RNA Extraction [YES / INSUFFICIENT MATERIAL / NO / PENDING]
RNA Purity OD 260/280 [n.nn / NOT APPLICABLE]
RNA Concentration (g/l) [0-999 / NOT APPLICABLE]
Number of RNA Aliquots [1-9 / NOT APPLICABLE]
RNA Volume per Aliquot in l [1-20 / NOT APPLICABLE]
Number of Sections for RNA Extraction [1-5 / NOT APPLICABLE]
DNA Extraction [YES / INSUFFICIENT MATERIAL / NO / PENDING]
DNA Purity OD 260/280 [n.nn / NOT APPLICABLE]
DNA concentration (g/l) [0-999 / NOT APPLICABLE]
Number of DNA Aliquots (1-9 or NOT APPLICABLE
DNA Volume per Aliquot in l [1-20 / NOT APPLICABLE]
Number of Sections for DNA Extraction [1-5 / NOT APPLICABLE]
TMA Taken? [YES / NO / PENDING]
Reason TMA Not Taken [INSUFFICIENT MATERIAL / NOT APPLICABLE]
Number of TMA Cores Taken [1 / 2]
Name of TMA  [GALLIUM-Leeds-LAB-number] respectively [GALLIUM-Kiel-LAB-number] - what ??
Position on TMA [1-50]
Sections Taken? [YES / NO / PENDING]
Reason Sections Not Taken [INSUFFICIENT MATERIAL / NOT APPLICABLE]
Number of Sections Taken (1-10)
