RSS Git Download  Clone
Raw Blame History
package NGS;

=begin
handles NGS data input from either Roche 454 .txt file or zipped Illumina MiSeq
vcf files. Pre-process data files to generate an Ensembl Variant Effect Predictor
(vep) input file. Sends vep input file to stand-alone vep script and post-process
output to generate html results table
=cut

# TODO: exons not sorted naturally; data supplied to tt as chr:location key, not exon

use Modern::Perl qw(2012); # 5.14
use autodie;

use Dancer2;
# use Dancer2::Core::Error; # still required ?

use Data::Dump qw(dump);
use Archive::Extract;
use Carp qw(croak);
use Template;
use IO::All;

use NGS::VEP;
use NGS::VEP::BioSphere; # api_version()

our $VERSION = _app_version(); # warn $VERSION;

# --- Globals ------------------------------------------------------------------
use constant VEP_SRC_FILENAME => 'data_src'; # form param containing filename
use constant VEP_INPUT_TT  => 'vep.tt'; # tmpl contains vep data input form
use constant VEP_OUTPUT_TT => 'vep/result.tt'; # tmpl to handle vep results
# get VEP version from Bio::EnsEMBL::ApiVersion::software_version:
use constant API_VERSION => NGS::VEP::BioSphere->api_version;

hook before => sub {
    var app_version => $VERSION; # NGS app
    var api_version => API_VERSION; # vep script
};

#-------------------------------------------------------------------------------
hook before_template_render => sub {
    my $tokens = shift;

    $tokens->{nsort_labno} = sub {
		my $list = shift || return; # warn dump $list;
		return sort _sort_by_labno(@$list);
	};
    $tokens->{nsort_exon} = sub { # can't do this yet - key = chr:location not exon
		my $list = shift || return; # warn dump $list;
		return sort _sort_by_exon(@$list);
	};
};

#-------------------------------------------------------------------------------
get '/' => sub { template 'index' }; # default index page

#-------------------------------------------------------------------------------
get '/vep' => sub {
    debug 'GET upload page';

    my $bio = NGS::VEP::BioSphere->new;
    my $types = $bio->get_cache_types; # warn dump $types; # core, refseq, etc
    my %dbs = map +($_ => 1), @$types; # cache types available to current api version

    var cache => \%dbs;
    
    template VEP_INPUT_TT, { title => 'Upload VEP DataFile' };
};

#-------------------------------------------------------------------------------
post '/vep' => sub {
    debug 'POST upload page'; # debug "Params are : " . dump( request->params );
    
    my %allparams = params; # debug dump \%allparams;
    
    my $input = {
        data_file => upload(VEP_SRC_FILENAME) || undef,
        form_opts => \%allparams,
    }; # warn dump $input->{data_file};
 
    _validate_user_input($input); # debug 'Validation results: ' . dump($input);

#-- display any errors
    if ( exists $input->{upload_error} ) {
        debug 'have upload error';
        return template VEP_INPUT_TT, {
            title => 'Upload Data Error',
            warning_message => 'You must upload the file again!',
            %$input,
        };
    }
    
#-- passed validation
    debug 'OK passed the error checks.';
    my $src  = $input->{data_file}; # debug $data;
    my $opts = $input->{form_opts};
    # add running environment for VEP debug rules:
    $opts->{environment} = dancer_app->environment; # warn $opts->{environment};

    # send submitted src data to vep script:
    my $data = _parse({ src => $src, opts => $opts });

    # delete temp file (if exists; auto-deleted for .t uploads):
    unless ( $input->{data_file}->{temp_fh} ) { # only exists for .t uploads
        if ( my $tmp_file = $input->{data_file}->tempname ) {
            io($tmp_file)->unlink if -e $tmp_file;
        }
    }

    return template VEP_OUTPUT_TT, { results => $data };
};

#-------------------------------------------------------------------------------
sub _parse {
    my $args = shift; # warn dump $args->{opts};
    
    my $form_opts = $args->{opts};
    my $src_data  = $args->{src};

    my $config = config; # warn dump $config;
    
    my %vep_args = (
        form_opts => $form_opts,
        src_data  => $src_data,
        config    => $config,
    ); # warn dump \%vep_args;
    
    my $vep = NGS::VEP->new(%vep_args);
    
    my $filename = $src_data->{filename}; # warn dump $file;

    # if zip file, extract (vcf) contents & process:
    if ( $filename =~ /zip\Z/ ) {
        # get list of files and extraction path of archive:
        my $ref = _unzip_file($src_data); # warn dump $extract; 
        return $vep->miseq_vcf($ref);
    }
    elsif ( $filename =~ /vcf\Z/ ) { # TODO: broken
        my $f = File::Spec->catfile('/tmp', $filename); # warn $f;
        my %h = (
            outdir => '/tmp',
            files  => [ $f ],
        );  warn dump \%h;
        return $vep->miseq_vcf(\%h);
    }
    else { # is one of permitted files types (.vep, etc):
        return $vep->roche_454; 
    }       
}

#-------------------------------------------------------------------------------
sub _validate_user_input {
    my $validation_report = shift;
 
    if ( defined $validation_report->{data_file} ) {
        debug 'At least there is a file uploaded.';
 
        #------ Check that the file is a valid data file
        $validation_report->{upload_error} = 'Not a valid data file!'
            unless _validate_file( $validation_report->{data_file} );
    }
    else {
        $validation_report->{upload_error} = 'No data file uploaded!';
    }
}

#-------------------------------------------------------------------------------
sub _validate_file {
    my $in_file = shift; # warn dump $in_file;
    
    my $cfg = config->{InputFile}; # warn dump $cfg;

    my $max_file_size = $cfg->{max_file_size} * 1024 * 1024; # MB
    my $file_suffix   = $cfg->{file_suffix};

    return (
        $in_file
        && ( lc $in_file->basename =~ /$file_suffix\Z/ )
        && ( $in_file->size <= $max_file_size )
    );
}

#-------------------------------------------------------------------------------
sub _unzip_file {
    my $src_data = shift; # warn dump $src_data;
    
    my $tmp_file = $src_data->tempname; # warn $tmp_file;
#    my $filename = $src_data->filename; # warn $filename; # don't need it
    
    my $ae = Archive::Extract->new( archive => $tmp_file );
    
    my $ok = $ae->extract( to => '/tmp' ); # warn dump $ok;
    
    if ($ok) {
        my $f = $ae->files; # arrayref of zip filenames, relative to unzip location
        my @files = map { File::Spec->catfile('/tmp', $_) } @$f; # make absolute
        
        my %h = (
            outdir => $ae->extract_path,
            files  => \@files,
        ); # warn dump \%h;
        io($tmp_file)->unlink; # delete zip; extracted dir deleted later
        return \%h;
    }
    else { die $ae->error }
}

#-------------------------------------------------------------------------------
sub _sort_by_labno {
	my ($y1, $r1) = split '_', $a; # warn Dumper [$r1, $y1];
	my ($y2, $r2) = split '_', $b; # warn Dumper [$r2, $y2];
	
	return $y1 <=> $y2 || $r1 <=> $r2;
}

#-------------------------------------------------------------------------------
sub _sort_by_exon {
	return $a cmp $b;
}

#-------------------------------------------------------------------------------
sub _app_version { return `git rev-list HEAD --count` }

true;