RSS Git Download  Clone
Raw Blame History
package LIMS::Controller::DataImport;

use base 'LIMS::Base';

use Moose;
with 'LIMS::Controller::Roles::DataFile';
has temp_files_sub_dir => ( is => 'rw', isa => 'Str' );
__PACKAGE__->meta->make_immutable(inline_constructor => 0);

use IO::All;
use Data::Dumper;
use File::Basename;
use LIMS::Local::Utils;
use LIMS::Local::ExcelHandler;
use Regexp::Common 'RE_num_ALL'; # all 'num' methods

use constant MAX_FILE_SIZE  => 50_000 * 1_024;  # Limit upload to 50MB
use constant TMP_DIR => 'src/data/tmp_files'; # relative path below app root

#-------------------------------------------------------------------------------
sub default : StartRunmode {
    my $self = shift;

    # shouldn't be called here - redirect to /
    $self->redirect( $self->query->url );
}

#-------------------------------------------------------------------------------
sub bcr_abl : Runmode {
    my $self = shift; $self->_debug_path($self->get_current_runmode);

    # if 1st call to method, or no file uploaded:
    $self->query->param('source') || return $self->tt_process(); # warn $self->query->param('source');

    # process zip file upload & get IO::All object for contents of upload/extract
    # dir (sets flash msg & returns empty on failure):
    my $io = $self->_process_upload('bcr_abl') || return $self->tt_process();

    # spreadsheet field defs:
    my %f = (
        WELL     => 0, # plate well id, eg A1-12, B1-12, etc
        LAB_REF  => 1, # lab_no & surname
        TARGET   => 2, # ABL / BCR ABL
        CT       => 6, # cycle threshold
        QUANTITY => 9, #         
    );
    
    my $xl = LIMS::Local::ExcelHandler->new(); # $self->debug($xl);
    
    my %data;
    
    CONTENT : foreach my $content ($io->all) {
        # skip non-XL files:
        next CONTENT unless $content->filename =~ /\.xls\Z/; # warn $content->filename;

        my $file = $content->name; # name() = path/to/file
        my $data = $xl->parse_xl_file($file); # warn Dumper $data; # AoA
            # open my $fh, '>data.txt'; print $fh Dumper $data;

        # skip file unless it's a BCR-ABL file:
        #next CONTENT unless $data->[2]->[1] =~ /Q922/; # Experiment File field

        ROW: for my $row (@$data) { # parse each row:
            # require labno & name in LAB_REF:
            my $sample_ref = $row->[ $f{LAB_REF} ] || next ROW; # warn $sample_ref;
            
            my ($labno, $surname) = $sample_ref =~ /(\d+\/\d{2}) (.*)/;
            next ROW unless $labno && $surname; # warn Dumper [ $labno, $surname ];

            $surname = lc LIMS::Local::Utils::trim($surname); # trim pre & post spaces

            # add surname entry (& check matches from previous rows):
            if ( my $existing = $data{$labno}{sample_name} ) { 
                $data{$labno}{error}++ unless $surname eq $existing;
                # warn Dumper [$surname , $existing];
            }
            else {
                $data{$labno}{sample_name} = $surname;
            }

            # get some data vals:
            my $quantity = $row->[ $f{QUANTITY} ]; # warn $quantity;  
            my $target   = $row->[ $f{TARGET} ];
            my $ct       = $row->[ $f{CT} ];

            # force quantity & CT values to 0 if CT not numerical (int or decimal):
            unless ( $ct =~ RE_num_real() ) { # warn $quantity;           
				$quantity = $ct = 0; 
			}

            # $data{$labno}{copy_number}{$target} += int $quantity; # using both vals now:
            # add ABL & BCR-ABL quantity values for each sample:
			push @{ $data{$labno}{copy_number}{$target} }, int $quantity;
			# add ABL & BCR-ABL cycle threshold values to CT array:
            push @{ $data{$labno}{CT}{$target} }, ( sprintf '%.2f', $ct );        
        }        
    }
    
    { # get request, patient data & existing result_summary for all samples in %data:
        # bcr abl is a Molecular section test:
        my $section = $self->model('LabSection')->get_lab_section_by_name('Molecular');

        LABNO: while ( my ($labno, $data) = each %data ) {
            my ($request_number, $year) = LIMS::Local::Utils::split_labno($labno);
            
            my $request = {};
            { # request & patient data:
                my @args = ( request_number => $request_number, year => $year );
            
                eval { # in case labno incorrect in data file:
                    $request
                        = $self->model('Request')->get_patient_and_request_data(\@args);
                };
                # return $self->error($@) if $@; # halt process, or: 
                next LABNO if $@; # skip so names mismatch highlighted in .tt
                $data->{request_data} = $request->as_tree; # warn Dumper $request->as_tree;
            }
            { # retrieve any existing result summary:
                my %args = (
                    request_id => $request->id,
                    section_id => $section->id,
                );
                my $existing_result
                    = $self->model('Result')->get_request_results_summary(\%args);
                $data->{existing_result} = $existing_result->as_tree; # warn Dumper $existing_result->as_tree;
            }
        }
    }

    # clean up:
    $self->_clean_up($io);

    return $self->tt_process('dataimport/bcr_abl_data.tt', { data => \%data });        
}

#-------------------------------------------------------------------------------
sub do_bcr_abl : Runmode { 
    my $self = shift; $self->_debug_path($self->get_current_runmode);

    my @results = $self->query->param('result'); # warn Dumper \@results;
    
    my $msgs = $self->messages('file_upload')->{bcr_abl};
    
    # bcr abl is a Molecular section test:
    my $section = $self->model('LabSection')->get_lab_section_by_name('Molecular');
    
    # auto-reportable config:
    my $rules = $self->get_yaml_file('bcr-abl') || {}; # in case doesn't exist   
    my $cfg   = $self->get_yaml_file('auto_reportable') || {}; # in case doesn't exist

    my $screening_term = $rules->{screening_term};
    # do we have auto_reportable data for screening_term ?
    my $does_auto_report = $cfg->{$screening_term} ? 1 : 0; # warn Dumper $does_auto_report;
    
    my %update_outcome;
    
    for my $row (@results) {
        my ($labno, $request_id, $abl, $bcr_abl, $ratio) = split '::', $row;
        
        my $result;
        
        if ($abl < 10_000) {
            $result = $msgs->{poor_quality}->{result};
        }
        else { # have ABL >= 10000
            # if ratio is 0 because BCR-ABL is 0, report sensitivity of assay:
            $ratio ||= sprintf '< %.4f', 100 / $abl;
            
            my %data = (
                BCR_ABL => $bcr_abl || 0, # can be empty
                ABL     => $abl,
                ratio   => $ratio,
            );
            my $tt = $self->tt_process('dataimport/imatinib.tt', \%data);
            $result = LIMS::Local::Utils::trim(${$tt}); # deref and remove tt spacing
        }

        # model requires section object, request_id & result:
        my %data = (
            request_id => $request_id,
            section    => $section,
            result     => $result,
        );
        
        # for auto-reporting:
        if ( $does_auto_report ) {
            my $config = LIMS::Local::Utils::clone($cfg); # deep clone to localise changes
            my $report_items = $config->{$screening_term}{report}; # warn Dumper $report_items;

            my $h = {}; # will hold local changes to report data eg diagnosis, comment, etc
            if ($abl < 10_000) {
                $h = $rules->{pcr_failed};
            }
            elsif (! $bcr_abl || $ratio <= 0.055) { # if no bcr-abl, $ratio is non-numerical
                $h = $rules->{major_molecular_response};
            }
            else { # $abl >= 10K, $bcl-abl > 0 and $ratio >0.055
                $h = $rules->{cml_treated};
            }
            # override existing cloned $cfg report vals:
            while ( my ($col, $val) = each %$h ) {
                $report_items->{$col} = $val; # or @{$report_items}{keys %$h} = values %$h
            }
            # add entire locally-modified cloned cfg file to %data:
            $data{auto_reportable_config} = $config; # warn Dumper $config;
            # also add lab-test section for lab-test sign-off:
            $data{lab_test_data} = $config->{$screening_term}{lab_test};
        } # warn Dumper $data{auto_reportable_config}; 

        my $rtn = $self->model('Result')->update_results_summary(\%data);  
        if ($rtn) { # model returns error on failure only
            $update_outcome{failure}{$labno} = $rtn; # supply error
        }
        else { # auto-report & supply result:
            $update_outcome{success}{$labno} = $result; 
        }
    } 
    
    return $self->tt_process({ data => \%update_outcome });
}

#-------------------------------------------------------------------------------
sub chimerism_prep : Runmode { 
    my $self = shift; $self->_debug_path($self->get_current_runmode);

	my @request_ids = $self->query->param('request_id');
	my $action = '/local_worklist?function_name=chimerism_worksheet';

	if (! @request_ids) {
		$self->flash(warning => $self->messages('worklist')->{no_data_submitted});
		return $self->redirect( $self->query->url . $action );
	}
	# get data on submitted request_id(s):
	my $data = $self->model('Local')->get_chimerism_data(\@request_ids); # hashref
	
	if (%$data) { # $self->debug($data);
		my $o = LIMS::Local::ExcelHandler->new(); # $self->debug($o);
		
		my $xls; # Spreadsheet::WriteExcel output 
		$o->initialise_fh(\$xls); # pass *ref* of container to ExcelHandler method 
		
		my %h; # container to hold data for write_excel_file() function
		
		my $n_sort = sub { LIMS::Local::Utils::n_sort(@_) };
		
		for my $id ( sort { $a <=> $b } keys %$data ) {
			my $req = $data->{$id};
			my $patient = $req->{patient_case}->{patient};
			
			my $hmds_ref = $req->{request_number} . '/' . ($req->{year} - 2000);
			my $name = sprintf '%s, %s', uc $patient->{last_name},
				join ', ', map { ucfirst $_ } grep { $_ }
					( $patient->{first_name}, $patient->{middle_name} );
			my $specimen = join ', ', @{ $req->{specimen} }; # should be 1
			my $baseline = $req->{baseline};
			my $lab_test = $req->{lab_test}; # arrayref
			my $previous = $req->{previous_result};

			# split on naturally sorted lab_tests:
			my @lab_tests = &$n_sort($lab_test); # warn Dumper \@lab_tests;
			for my $test(@lab_tests) { # add data to 1st & 2nd worksheets:
				push @{ $h{first} }, [ $hmds_ref, $name, $specimen, $test,
					$baseline, $previous ];			
				push @{ $h{second} }, [ $id, $hmds_ref, $name, $test ];
			}
		}

		my @data = (
			{ # 1st worksheet:
				name => undef, # sheet1 default OK
				data => $h{first}, # array(ref) of arrayrefs
				headers => [ 'hmds ref', 'name', 'sample', 'test',					
					'baseline data', 'previous result', ],
			},
			{ # 2nd worksheet:
				name => undef, # sheet2 default OK
				data => $h{second}, # array(ref) of arrayrefs
				headers => [
					'id', 'hmds ref', 'name', 'test',  'baseline chimerism data',
					"Donor\nD13S631", "Donor\nD13S258", "Donor\nD13S51",
					"Donor\nD13S851", "Patient D13S631", "Patient D13S258",
					"Patient D13S51", "Patient D13S851",
				],
				columns => { 4 => 30 }, # 'baseline chimerism data' header
				format  => ['text_wrap'],
			},
		);
		$o->write_excel_file(\@data); # formats @data into $xls output

		{ # create unique xl filename for headers:
			my $now = LIMS::Local::Utils::time_now; # warn Dumper $now;
			my $filename = sprintf 'chimerism-%s.xls', $now->epoch;					
			$self->header_props( -type => 'application/excel',
				-expires => 'now', -attachment => $filename );
		}
		binmode STDOUT; # apparently this is needed even for linux
		return $xls; 
	}
	else {
		$self->flash(warning => $self->messages('worklist')->{no_data_submitted});
		return $self->redirect( $self->query->url . $action );
	}
}

#-------------------------------------------------------------------------------
sub chimerism : Runmode {
    my $self = shift; $self->_debug_path($self->get_current_runmode);

    # if 1st call to method, or no file uploaded:
    $self->query->param('source') || return $self->tt_process(); # warn $self->query->param('source');

    # process zip file upload & get IO::All object for contents of upload/extract
    # dir (sets flash msg & returns empty on failure):
    my $io = $self->_process_upload('chimerism') || return $self->tt_process();

	# get file(s) from 'chimerism' upload dir - should only be one:
	my @files = map { $_->name }
		grep { $_->name =~ /chimerism-\d+.xls/ } $io->all; # warn Dumper \@files;
	# check only expected file in upload dir:
	unless ( scalar @files == 1 ) {
		my $msg = $self->messages('file_upload')->{chimerism}->{file_number};
		$self->flash( error => $msg );
		return $self->redirect( $self->query->url . '/data-import/chimerism' );
	}
	
    my $xl = LIMS::Local::ExcelHandler->new(); # $self->debug($xl);
    my $data = $xl->parse_xl_file($files[0], { sheet => 1 }); # warn Dumper $data; # AoA
	
	my $headers = shift @$data; # warn Dumper $headers;
	
	my $i = 0; # create spreadsheet field defs from header row (new-lines -> spaces):
	my %f = map { $_ =~ s/\n/ /g; $_ => $i++ } @$headers; # warn Dumper \%f;

	# get array of col positions containing patient & donor results:
	my @donor_cols   = map $f{$_}, grep { $_ =~ /donor/i } keys %f; # warn Dumper \@donor_cols;
	my @patient_cols = map $f{$_}, grep { $_ =~ /patient/i } keys %f; # warn Dumper \@patient_cols;
	
	my %h;
	for my $row (@$data) {
		my $id   = $row->[ $f{id}   ];
		my $test = $row->[ $f{test} ];

		my $baseline = $row->[ $f{'baseline chimerism data'} ];
		
		my %data = (); # re-initialise
		# populate %data with data from $row:
		@data{@$headers} = @$row;
		
		unless ($baseline) { # will not have donor & patient data
			# get donor & patient results - should be ONLY 1 of each, with matching id's:
			my (@donor_result_field)   = map $_, grep $row->[$_], @donor_cols;
			my (@patient_result_field) = map $_, grep $row->[$_], @patient_cols;
				# warn Dumper [@donor_result_field, @patient_result_field]; 
			# check only 1 of each:
			$data{data_ok} += (
				scalar @donor_result_field == 1 &&
				scalar @patient_result_field == 1
			);
			my ($patient_result_field) = @patient_result_field; # warn Dumper $patient_result_field;
			my ($donor_result_field) = @donor_result_field; # warn Dumper $donor_result_field;
			
			if ( $donor_result_field && $patient_result_field ) {
				$data{data_ok} += # check donor & patient data fields are 4-cols apart:
					( $patient_result_field == $donor_result_field + 4 );
				# add donor & patient data results:
				$data{donor_result}   = $row->[$donor_result_field];
				$data{patient_result} = $row->[$patient_result_field];
				# capture donor (or patient - it's the same) 'D' value:
				my ($marker_ref) = $headers->[$donor_result_field] =~ /Donor (.*)/;
				$data{marker_ref} = $marker_ref;
			}
		}
		
		$h{$id}{$test} = \%data;
	} # warn Dumper \%h;

    # clean up:
    $self->_clean_up($io);

    return $self->tt_process('dataimport/chimerism_data.tt', { data => \%h });
}

#-------------------------------------------------------------------------------
sub do_chimerism : Runmode {
    my $self = shift; $self->_debug_path($self->get_current_runmode);

    my @results = $self->query->param('result'); # warn Dumper \@results;
	
	my %h;
    for my $row (@results) { # warn Dumper $row;
        my ($request_id, $hmds_ref, $test, $baseline, $result) = split '~', $row;
			# warn Dumper [ $request_id, $test, $baseline, $result ];
		$h{$request_id}{hmds_ref} = $hmds_ref;
		# need CD3, CD15 & WBC results so create array for joining:
		push @{ $h{$request_id}{result} },
			$baseline || sprintf '%s = %s%%', $test, $result;
	} # warn Dumper \%h;
	
    # chimerism is a Molecular section test:
    my $section = $self->model('LabSection')->get_lab_section_by_name('Molecular');

    my $cfg = $self->get_yaml_file('auto_reportable') || {}; # in case doesn't exist	

	my %update_outcome; # to hold model returns 
	while ( my($request_id, $d) = each %h ) { # warn Dumper $d;
		my $hmds_ref = $d->{hmds_ref},
		my $result = join "\n", @{ $d->{result} };
		my $lab_test_data = do {
			my $o = $self->model('Screen')->get_initial_screen($request_id);
			my $presentation = $o->screen->description;
			$cfg->{$presentation}{lab_test};
		}; # warn Dumper $presentation;
		my %data = (
			auto_reportable_config => $cfg,
			lab_test_data => $lab_test_data,
			request_id => $request_id,
			section    => $section,
			result     => $result,
		);

	    my $rtn = $self->model('Result')->update_results_summary(\%data);  
		if ($rtn) { # model returns error on failure only
			$update_outcome{failure}{$hmds_ref} = $rtn; # supply error
		}
		else { # auto-report & supply result:
			$update_outcome{success}{$hmds_ref} = $result; 
		}
	}
	return $self->tt_process({ data => \%update_outcome });
}

#--------------------------- private methods -----------------------------------
sub _process_upload {
    my ($self, $sub_dir) = @_; $self->_debug_path();
    
    my $src_file = $self->query->upload('source'); # CGI method
    my $filename = fileparse($src_file); # warn $filename;

    # expect one of following suffixes:
    my ($suffix) = $filename =~ /(zip|xls)\Z/;

    unless ($suffix) { # zip or xls file
        $self->flash( error => $self->messages('file_upload')->{unsuitable_filetype} );
        return 0; 
    }
    
    # set sub-directory for upload file processing - all files deleted afterwards:
    $self->temp_files_sub_dir($sub_dir);

    my $files_dir = $self->_get_temp_files_dir;
    
    # params for process_file_upload() & LIMS::Local::Utils::unzip_file():
    my %args = (
        target_dir => $files_dir,
        src_file   => $src_file, # filename in scalar context, else fh
        filename   => $filename,
    );
    
    # write uploaded content to disk:
    $self->process_file_upload(\%args)
    || return $self->error( 'process_file_upload() did not return successfully' );    
    
    if ( $suffix eq 'zip' ) { # unzip if required:
        # method returns contents of zip file:
        my $zip_contents = LIMS::Local::Utils::unzip_file(\%args);
    
        # stuff zip_contents into stash:
        $self->stash->{zip_contents} = $zip_contents;
    }

    # return IO::All object of target files dir:
    if ( my $io = io($files_dir) ) {
        return $io;
    }
    else {
        $self->flash( error => 'could not read contents of upload directory' );
        return 0;
    }
}

# where to extract zip file:
sub _get_temp_files_dir {
    my $self = shift; $self->_debug_path();
    
    my $path_to_temp_files = join '/', 
        $self->cfg('path_to_app_root'), TMP_DIR , $self->temp_files_sub_dir;

    return $path_to_temp_files;
}

# delete all from unzip dir:
sub _clean_up {
    my ($self, $io) = @_; $self->_debug_path();
    
    foreach ($io->all) { # warn $_->name; # if $_->is_file;
        $_->unlink if $_->is_file;
    }    
}

1;