package LIMS::Controller::DataImport;
use base 'LIMS::Base';
use Moose;
with 'LIMS::Controller::Roles::DataFile';
has temp_files_sub_dir => ( is => 'rw', isa => 'Str' );
__PACKAGE__->meta->make_immutable(inline_constructor => 0);
use IO::All;
use Data::Dumper;
use File::Basename;
use LIMS::Local::Utils;
use LIMS::Local::ExcelHandler;
use Regexp::Common 'RE_num_ALL'; # all 'num' methods
use constant MAX_FILE_SIZE => 50_000 * 1_024; # Limit upload to 50MB
use constant TMP_DIR => 'src/data/tmp_files'; # relative path below app root
#-------------------------------------------------------------------------------
sub default : StartRunmode {
my $self = shift;
# shouldn't be called here - redirect to /
$self->redirect( $self->query->url );
}
#-------------------------------------------------------------------------------
sub bcr_abl : Runmode {
my $self = shift; $self->_debug_path($self->get_current_runmode);
# if 1st call to method, or no file uploaded:
$self->query->param('source') || return $self->tt_process(); # warn $self->query->param('source');
# process zip file upload & get IO::All object for contents of upload/extract
# dir (sets flash msg & returns empty on failure):
my $io = $self->_process_upload('bcr_abl') || return $self->tt_process();
# spreadsheet field defs:
my %f = (
WELL => 0, # plate well id, eg A1-12, B1-12, etc
LAB_REF => 1, # lab_no & surname
TARGET => 2, # ABL / BCR ABL
CT => 6, # cycle threshold
QUANTITY => 9, #
);
my $xl = LIMS::Local::ExcelHandler->new(); # $self->debug($xl);
my %data;
CONTENT : foreach my $content ($io->all) {
# skip non-XL files:
next CONTENT unless $content->filename =~ /\.xls\Z/; # warn $content->filename;
my $file = $content->name; # name() = path/to/file
my $data = $xl->parse_xl_file($file); # warn Dumper $data; # AoA
# open my $fh, '>data.txt'; print $fh Dumper $data;
# skip file unless it's a BCR-ABL file:
#next CONTENT unless $data->[2]->[1] =~ /Q922/; # Experiment File field
ROW: for my $row (@$data) { # parse each row:
# require labno & name in LAB_REF:
my $sample_ref = $row->[ $f{LAB_REF} ] || next ROW; # warn $sample_ref;
my ($labno, $surname) = $sample_ref =~ /(\d+\/\d{2}) (.*)/;
next ROW unless $labno && $surname; # warn Dumper [ $labno, $surname ];
$surname = lc LIMS::Local::Utils::trim($surname); # trim pre & post spaces
# add surname entry (& check matches from previous rows):
if ( my $existing = $data{$labno}{sample_name} ) {
$data{$labno}{error}++ unless $surname eq $existing;
# warn Dumper [$surname , $existing];
}
else {
$data{$labno}{sample_name} = $surname;
}
# get some data vals:
my $quantity = $row->[ $f{QUANTITY} ]; # warn $quantity;
my $target = $row->[ $f{TARGET} ];
my $ct = $row->[ $f{CT} ];
# force quantity & CT values to 0 if CT not numerical (int or decimal):
unless ( $ct =~ RE_num_real() ) { # warn $quantity;
$quantity = $ct = 0;
}
# $data{$labno}{copy_number}{$target} += int $quantity; # using both vals now:
# add ABL & BCR-ABL quantity values for each sample:
push @{ $data{$labno}{copy_number}{$target} }, int $quantity;
# add ABL & BCR-ABL cycle threshold values to CT array:
push @{ $data{$labno}{CT}{$target} }, ( sprintf '%.2f', $ct );
}
}
{ # get request, patient data & existing result_summary for all samples in %data:
my $section = do { # get lab-section for molecular_section_name:
my $section_name = $self->cfg('settings')->{molecular_section_name};
$self->model('LabSection')->get_lab_section_by_name($section_name);
};
LABNO: while ( my ($labno, $data) = each %data ) {
my ($request_number, $year) = LIMS::Local::Utils::split_labno($labno);
my $request = {};
{ # request & patient data:
my @args = ( request_number => $request_number, year => $year );
eval { # in case labno incorrect in data file:
$request = $self->model('Request')
->get_patient_and_request_data(\@args);
};
# return $self->error($@) if $@; # halt process, or:
next LABNO if $@; # skip so names mismatch highlighted in .tt
$data->{request_data} = $request->as_tree; # warn Dumper $request->as_tree;
}
{ # retrieve any existing result summary:
my %args = (
request_id => $request->id,
section_id => $section->id,
);
my $existing_result = $self->model('Result')
->get_request_results_summary(\%args);
$data->{existing_result} = $existing_result->as_tree; # warn Dumper $existing_result->as_tree;
}
}
}
# clean up:
$self->_clean_up($io);
return $self->tt_process('dataimport/bcr_abl_data.tt', { data => \%data });
}
#-------------------------------------------------------------------------------
sub do_bcr_abl : Runmode {
my $self = shift; $self->_debug_path($self->get_current_runmode);
my @results = $self->query->param('result'); # warn Dumper \@results;
my $msgs = $self->messages('file_upload')->{bcr_abl};
# get lab-section for molecular_section_name:
my $section = do {
my $section_name = $self->cfg('settings')->{molecular_section_name};
$self->model('LabSection')->get_lab_section_by_name($section_name);
};
# auto-reportable config:
my $rules = $self->get_yaml_file('bcr-abl') || {}; # in case doesn't exist
my $cfg = $self->get_yaml_file('auto_reportable') || {}; # in case doesn't exist
my $screening_term = $rules->{screening_term};
# do we have auto_reportable data for screening_term ?
my $does_auto_report = $cfg->{$screening_term} ? 1 : 0; # warn Dumper $does_auto_report;
my %update_outcome;
for my $row (@results) {
my ($labno, $request_id, $abl, $bcr_abl, $ratio) = split '::', $row;
my $result;
if ($abl < 10_000) {
$result = $msgs->{poor_quality}->{result};
}
else { # have ABL >= 10000
# if ratio is 0 because BCR-ABL is 0, report sensitivity of assay:
$ratio ||= sprintf '< %.4f', 100 / $abl;
my %data = (
BCR_ABL => $bcr_abl || 0, # can be empty
ABL => $abl,
ratio => $ratio,
);
my $tt = $self->tt_process('dataimport/imatinib.tt', \%data);
$result = LIMS::Local::Utils::trim(${$tt}); # deref and remove tt spacing
}
# model requires section object, request_id & result:
my %data = (
request_id => $request_id,
section => $section,
result => $result,
);
# for auto-reporting:
if ( $does_auto_report ) {
my $config = LIMS::Local::Utils::clone($cfg); # deep clone to localise changes
my $report_items = $config->{$screening_term}{report}; # warn Dumper $report_items;
my $h = {}; # will hold local changes to report data eg diagnosis, comment, etc
if ($abl < 10_000) {
$h = $rules->{pcr_failed};
}
elsif (! $bcr_abl || $ratio <= 0.055) { # if no bcr-abl, $ratio is non-numerical
$h = $rules->{major_molecular_response};
}
else { # $abl >= 10K, $bcl-abl > 0 and $ratio >0.055
$h = $rules->{cml_treated};
}
# override existing cloned $cfg report vals:
while ( my ($col, $val) = each %$h ) {
$report_items->{$col} = $val; # or @{$report_items}{keys %$h} = values %$h
}
# add entire locally-modified cloned cfg file to %data:
$data{auto_reportable_config} = $config; # warn Dumper $config;
# also add lab-test section for lab-test sign-off:
$data{lab_test_data} = $config->{$screening_term}{lab_test};
} # warn Dumper $data{auto_reportable_config};
my $rtn = $self->model('Result')->update_results_summary(\%data);
if ($rtn) { # model returns error on failure only
$update_outcome{failure}{$labno} = $rtn; # supply error
}
else { # auto-report & supply result:
$update_outcome{success}{$labno} = $result;
}
}
return $self->tt_process({ data => \%update_outcome });
}
#-------------------------------------------------------------------------------
sub chimerism_prep : Runmode {
my $self = shift; $self->_debug_path($self->get_current_runmode);
my @request_ids = $self->query->param('request_id');
my $action = '/local_worklist?function_name=chimerism_worksheet';
if (! @request_ids) {
$self->flash(warning => $self->messages('worklist')->{no_data_submitted});
return $self->redirect( $self->query->url . $action );
}
# get data on submitted request_id(s):
my $data = $self->model('Local')->get_chimerism_data(\@request_ids); # hashref
if (%$data) { # $self->debug($data);
my $xls; # for Spreadsheet::WriteExcel output - supplied as *scalarref*
my $o = LIMS::Local::ExcelHandler->new({ scalarref => \$xls });
my %h; # to hold worksheets 1 & 2 data for write_excel_file() function
my $n_sort = sub { LIMS::Local::Utils::n_sort(@_) };
for my $id ( sort { $a <=> $b } keys %$data ) {
my $req = $data->{$id};
my $patient = $req->{patient_case}->{patient};
my $hmds_ref = $req->{request_number} . '/' . ($req->{year} - 2000);
my $name = sprintf '%s, %s', uc $patient->{last_name},
join ', ', map { ucfirst $_ } grep { $_ }
( $patient->{first_name}, $patient->{middle_name} );
my $specimen = join ', ', @{ $req->{specimen} }; # should be 1
my $baseline = $req->{baseline};
my $lab_test = $req->{lab_test}; # arrayref
my $previous = $req->{previous_result};
my $notes = $patient->{patient_note}
? $patient->{patient_note}->{detail} : '';
# split on naturally sorted lab_tests:
my @lab_tests = &$n_sort($lab_test); # warn Dumper \@lab_tests;
for my $test(@lab_tests) { # add data to 1st & 2nd worksheets:
push @{ $h{first} }, [ $hmds_ref, $name, $specimen, $test,
$baseline, $previous, $notes ];
push @{ $h{second} }, [ $id, $hmds_ref, $name, $test ];
}
}
my @data = (
{ # 1st worksheet:
name => undef, # sheet1 default OK
data => $h{first}, # array(ref) of arrayrefs
headers => [ 'labno', 'name', 'sample', 'test',
'baseline data', 'previous result', 'notes' ],
columns => { 1 => 20, 3 => 15, 4 => 30, 5 => 32, 6 => 32 }, # fix width
format => ['text_wrap'],
},
{ # 2nd worksheet:
name => undef, # sheet2 default OK
data => $h{second}, # array(ref) of arrayrefs
headers => [
'id', 'labno', 'name', 'test', 'baseline chimerism data',
"Donor\nD13s631", "Donor\nD18s851", "Donor\nD13s258",
"Donor\nD18s51", "Patient D13s631", "Patient D18s851",
"Patient D13s258", "Patient D18s51",
],
columns => { 2 => 20, 3 => 15, 4 => 30 }, # fix width
format => ['text_wrap'],
},
);
$o->write_excel_file(\@data); # formats @data into $xls output
{ # create unique xl filename for headers:
my $now = LIMS::Local::Utils::time_now; # warn Dumper $now;
my $date_str = $now->strftime('%Y-%m-%d-%H%M%S');
my $filename = sprintf 'chimerism-%s.xls', $date_str;
$self->header_props( -type => 'application/excel',
-expires => 'now', -attachment => $filename );
}
binmode STDOUT; # apparently this is needed even for linux
return $xls;
}
else {
$self->flash(warning => $self->messages('worklist')->{no_data_submitted});
return $self->redirect( $self->query->url . $action );
}
}
#-------------------------------------------------------------------------------
sub chimerism : Runmode {
my $self = shift; $self->_debug_path($self->get_current_runmode);
# if 1st call to method, or no file uploaded:
$self->query->param('source') || return $self->tt_process(); # warn $self->query->param('source');
# process zip file upload & get IO::All object for contents of upload/extract
# dir (sets flash msg & returns empty on failure):
my $io = $self->_process_upload('chimerism') || return $self->tt_process();
# get file(s) from 'chimerism' upload dir - should only be one:
my @files = map { $_->name } # eg chimerism-2014-02-28-150130.xls
grep { $_->name =~ /chimerism-[\d\-]+.xls/ } $io->all; # warn Dumper \@files;
# check only expected file in upload dir:
unless ( scalar @files == 1 ) {
my $msg = $self->messages('file_upload')->{chimerism}->{file_number};
$self->flash( error => $msg );
return $self->redirect( $self->query->url . '/data-import/chimerism' );
}
my $xl = LIMS::Local::ExcelHandler->new(); # $self->debug($xl);
my $data = $xl->parse_xl_file($files[0], { sheet => 1 }); # warn Dumper $data; # AoA
my $headers = shift @$data; # warn Dumper $headers;
my $i = 0; # create spreadsheet field defs from header row (new-lines -> spaces):
my %f = map { $_ =~ s/\n/ /g; $_ => $i++ } @$headers; # warn Dumper \%f;
# get array of col positions containing patient & donor results:
my @donor_cols = map $f{$_}, grep { $_ =~ /donor/i } keys %f; # warn Dumper \@donor_cols;
my @patient_cols = map $f{$_}, grep { $_ =~ /patient/i } keys %f; # warn Dumper \@patient_cols;
my %h;
for my $row (@$data) {
my $id = $row->[ $f{id} ];
my $test = $row->[ $f{test} ];
my $baseline = $row->[ $f{'baseline chimerism data'} ];
my %data = (); # re-initialise
# populate %data with data from $row:
@data{@$headers} = @$row;
unless ($baseline) { # will not have donor & patient data
# get donor & patient results - should be ONLY 1 of each, with matching id's:
my (@donor_result_field) = map $_, grep { # using defined in case it's zero
defined $row->[$_] && $row->[$_] =~ /\d+/ } @donor_cols;
my (@patient_result_field) = map $_, grep { # using defined in case it's zero
defined $row->[$_] && $row->[$_] =~ /\d+/ } @patient_cols;
# warn Dumper [@donor_result_field, @patient_result_field];
# if both donor & patient result submitted:
if ( @donor_result_field && @patient_result_field ) {
# check only 1 of each:
$data{data_ok} += (
scalar @donor_result_field == 1 &&
scalar @patient_result_field == 1
);
my ($patient_result_field) = @patient_result_field; # warn Dumper $patient_result_field;
my ($donor_result_field) = @donor_result_field; # warn Dumper $donor_result_field;
$data{data_ok} += # check donor & patient data fields are 4-cols apart:
( $patient_result_field == $donor_result_field + 4 );
# add donor & patient data results:
$data{donor_result} = $row->[$donor_result_field];
$data{patient_result} = $row->[$patient_result_field];
# fix for submitting a donor result if patient result is zero:
$data{data_ok} = 0 if defined $data{donor_result} # including 0
&& ! $data{patient_result}; # ie patient_result = 0
# capture donor (or patient - it's the same) 'D' value:
my ($marker_ref) = $headers->[$donor_result_field] =~ /Donor (.*)/;
$data{marker_ref} = $marker_ref;
}
# if only patient submitted (expected value is zero = 100% result):
elsif ( @patient_result_field && ! @donor_result_field ) {
my ($patient_result_field) = @patient_result_field;
my $result = $row->[$patient_result_field]; # warn $result;
if ( defined $result && ! $result ) { # ie it's zero:
$data{patient_result} = $result;
$data{donor_result} = 1; # will give result = 100 * 1 / 1 + 0
$data{data_ok} = 2; # flag for template
}
}
}
$h{$id}{$test} = \%data;
} # warn Dumper \%h;
# clean up:
$self->_clean_up($io);
return $self->tt_process('dataimport/chimerism_data.tt', { data => \%h });
}
#-------------------------------------------------------------------------------
sub do_chimerism : Runmode {
my $self = shift; $self->_debug_path($self->get_current_runmode);
my @results = $self->query->param('result'); # warn Dumper \@results;
my %h;
for my $row (@results) { # warn Dumper $row;
my ($request_id, $hmds_ref, $test, $baseline, $marker, $result)
= split '~', $row; # warn Dumper [ $request_id, $test, $marker, $baseline, $result ];
# modify baseline string if exists:
if ( my $str = $baseline ) {
$baseline = 'Baseline chimerism data: ' . $str;
} # warn $baseline;
$h{$request_id}{marker} = $marker;
$h{$request_id}{hmds_ref} = $hmds_ref;
# need CD3, CD15 & WBC results so create array for joining:
push @{ $h{$request_id}{result} },
$baseline || sprintf '%s = %s%%', $test, $result;
} # warn Dumper \%h;
# chimerism is a Molecular section test:
my $section = $self->model('LabSection')->get_lab_section_by_name('Molecular');
my $cfg = $self->get_yaml_file('auto_reportable') || {}; # in case doesn't exist
my %update_outcome; # to hold model returns
while ( my($request_id, $d) = each %h ) { # warn Dumper $d;
my $hmds_ref = $d->{hmds_ref},
my $result = join "\n", @{ $d->{result} };
# insert 'donor' into result:
$result =~ s/(CD\d+|WBC) (chimerism)/$1 donor $2/g; # warn $result;
if ( my $marker = $d->{marker} ) { # append marker if exists onto result:
$result .= sprintf ' [%s]', $marker;
}
my $lab_test_data = do {
my $o = $self->model('Screen')->get_initial_screen($request_id);
my $presentation = $o->screen->description; # warn Dumper $presentation;
$cfg->{$presentation}{lab_test};
}; # warn Dumper $lab_test_data;
my %data = (
auto_reportable_config => $cfg,
lab_test_data => $lab_test_data,
request_id => $request_id,
section => $section,
result => $result,
); # warn Dumper $data;
my $rtn = $self->model('Result')->update_results_summary(\%data);
if ($rtn) { # model returns error on failure only
$update_outcome{failure}{$hmds_ref} = $rtn; # supply error
}
else { # auto-report & supply result:
$update_outcome{success}{$hmds_ref} = $result;
}
}
return $self->tt_process({ data => \%update_outcome });
}
#--------------------------- private methods -----------------------------------
sub _process_upload {
my ($self, $sub_dir) = @_; $self->_debug_path();
my $src_file = $self->query->upload('source'); # CGI method
my $filename = fileparse($src_file); # warn $filename;
# expect one of following suffixes:
my ($suffix) = $filename =~ /(zip|xls)\Z/;
unless ($suffix) { # zip or xls file
$self->flash( error => $self->messages('file_upload')->{unsuitable_filetype} );
return 0;
}
# set sub-directory for upload file processing - all files deleted afterwards:
$self->temp_files_sub_dir($sub_dir);
my $files_dir = $self->_get_temp_files_dir;
# params for process_file_upload() & LIMS::Local::Utils::unzip_file():
my %args = (
target_dir => $files_dir,
src_file => $src_file, # filename in scalar context, else fh
filename => $filename,
);
# write uploaded content to disk:
$self->process_file_upload(\%args)
|| return $self->error( 'process_file_upload() did not return successfully' );
if ( $suffix eq 'zip' ) { # unzip if required:
# method returns contents of zip file:
my $zip_contents = LIMS::Local::Utils::unzip_file(\%args);
# stuff zip_contents into stash:
$self->stash->{zip_contents} = $zip_contents;
}
# return IO::All object of target files dir:
if ( my $io = io($files_dir) ) {
return $io;
}
else {
$self->flash( error => 'could not read contents of upload directory' );
return 0;
}
}
# where to extract zip file:
sub _get_temp_files_dir {
my $self = shift; $self->_debug_path();
my $path_to_temp_files = join '/',
$self->cfg('path_to_app_root'), TMP_DIR , $self->temp_files_sub_dir;
return $path_to_temp_files;
}
# delete all from unzip dir:
sub _clean_up {
my ($self, $io) = @_; $self->_debug_path();
foreach ($io->all) { # warn $_->name; # if $_->is_file;
$_->unlink if $_->is_file;
}
}
1;