package NGS;
use Modern::Perl qw(2012); # 5.14
use autodie;
=begin
handles NGS data input from either Roche 454 .txt file or zipped Illumina MiSeq
vcf files. Pre-process data files to generate an Ensembl Variant Effect Predictor
(vep) input file. Sends vep input file to stand-alone vep script and post-process
output to generate html results table
=cut
# TODO: exons not sorted naturally; data supplied to tt as chr:location key, not exon
use Dancer2;
use Dancer2::Core::Error;
use Archive::Extract;
use Data::Dump qw(dump);
use Carp qw(croak);
use Template;
use IO::All;
use autodie;
use NGS::VEP;
our $VERSION = '0.1';
# --- Globals ------------------------------------------------------------------
use constant VEP_SRC_FILENAME => 'data_src'; # form param containing filename
use constant VEP_INPUT_TT => 'vep.tt'; # tmpl contains vep data input form
use constant VEP_OUTPUT_TT => 'vep/result.tt'; # tmpl to handle vep results
#-------------------------------------------------------------------------------
hook before_template_render => sub {
my $tokens = shift;
$tokens->{nsort_labno} = sub {
my $list = shift || return; # warn dump $list;
return sort _sort_by_labno(@$list);
};
$tokens->{nsort_exon} = sub { # can't do this yet - key = chr:location not exon
my $list = shift || return; # warn dump $list;
return sort _sort_by_exon(@$list);
};
};
#-------------------------------------------------------------------------------
get '/' => sub { template 'index' }; # default index page
#-------------------------------------------------------------------------------
get '/vep' => sub {
debug 'GET upload page';
template VEP_INPUT_TT, { title => 'Upload VEP DataFile' };
};
#-------------------------------------------------------------------------------
post '/vep' => sub {
debug 'POST upload page'; # debug "Params are : " . dump( request->params );
my %allparams = params; # debug dump \%allparams;
my $input = {
data_file => upload(VEP_SRC_FILENAME) || undef,
form_opts => \%allparams,
}; # warn dump $input->{data_file};
_validate_user_input($input); # debug 'Validation results: ' . dump($input);
#-- display any errors
if ( exists $input->{upload_error} ) {
debug 'have upload error';
return template VEP_INPUT_TT, {
title => 'Upload Data Error',
warning_message => 'You must upload the file again!',
%$input,
};
}
#-- passed validation
debug 'OK passed the error checks.';
my $src = $input->{data_file}; # debug $data;
my $opts = $input->{form_opts};
# add running environment for VEP debug rules:
$opts->{environment} = dancer_app->environment; # warn $opts->{environment};
# send submitted src data to vep script:
my $data = _parse({ src => $src, opts => $opts });
# delete temp file (if exists; auto-deleted for .t uploads):
unless ( $input->{data_file}->{temp_fh} ) { # only exists for .t uploads
if ( my $tmp_file = $input->{data_file}->tempname ) {
io($tmp_file)->unlink if -e $tmp_file;
}
}
return template VEP_OUTPUT_TT, { results => $data };
};
#-------------------------------------------------------------------------------
sub _parse {
my $args = shift; # warn dump $args->{opts};
my $form_opts = $args->{opts};
my $src_data = $args->{src};
my $config = config; # warn dump $config;
my %vep_args = (
form_opts => $form_opts,
src_data => $src_data,
config => $config,
); # warn dump \%vep_args;
my $vep = NGS::VEP->new(%vep_args);
my $filename = $src_data->{filename}; # warn dump $file;
# if zip file, extract (vcf) contents & process:
if ( $filename =~ /zip\Z/ ) {
# get list of files and extraction path of archive:
my $ref = _unzip_file($src_data); # warn dump $extract;
return $vep->miseq_vcf($ref);
}
## elsif ( $filename =~ /vcf\Z/ ) { # TODO: broken
# my $f = File::Spec->catfile('/tmp', io($file)->filename); warn $f;
# my %h = (
# outdir => '/tmp',
# files => [ $f ],
# ); # warn dump \%h;
# return $vep->miseq_vcf(\%h);
# }
else { # is one of permitted files types (.vep, etc):
return $vep->roche_454;
}
}
#-------------------------------------------------------------------------------
sub _validate_user_input {
my $validation_report = shift;
if ( defined $validation_report->{data_file} ) {
debug 'At least there is a file uploaded.';
#------ Check that the file is a valid data file
$validation_report->{upload_error} = 'Not a valid data file!'
unless _validate_file( $validation_report->{data_file} );
}
else {
$validation_report->{upload_error} = 'No data file uploaded!';
}
}
#-------------------------------------------------------------------------------
sub _validate_file {
my $in_file = shift; # warn dump $in_file;
my $cfg = config->{InputFile}; # warn dump $cfg;
my $max_file_size = $cfg->{max_file_size} * 1024 * 1024; # MB
my $file_suffix = $cfg->{file_suffix};
return (
$in_file
&& ( lc $in_file->basename =~ /$file_suffix\Z/ )
&& ( $in_file->size <= $max_file_size )
);
}
#-------------------------------------------------------------------------------
sub _unzip_file {
my $src_data = shift; # warn dump $src_data;
my $tmp_file = $src_data->tempname; # warn $tmp_file;
# my $filename = $src_data->filename; # warn $filename; # don't need it
my $ae = Archive::Extract->new( archive => $tmp_file );
my $ok = $ae->extract( to => '/tmp' ); # warn dump $ok;
if ($ok) {
my $f = $ae->files; # arrayref of zip filenames, relative to unzip location
my @files = map { File::Spec->catfile('/tmp', $_) } @$f; # make absolute
my %h = (
outdir => $ae->extract_path,
files => \@files,
); # warn dump \%h;
io($tmp_file)->unlink; # delete zip; extracted dir deleted later
return \%h;
}
else { die $ae->error }
}
#-------------------------------------------------------------------------------
sub _sort_by_labno {
my ($y1, $r1) = split '_', $a; # warn Dumper [$r1, $y1];
my ($y2, $r2) = split '_', $b; # warn Dumper [$r2, $y2];
return $y1 <=> $y2 || $r1 <=> $r2;
}
#-------------------------------------------------------------------------------
sub _sort_by_exon {
return $a cmp $b;
}
true;