package NGS;
=begin
handles NGS data input from either Roche 454 .txt file or zipped Illumina MiSeq
vcf files. Pre-process data files to generate an Ensembl Variant Effect Predictor
(vep) input file. Sends vep input file to stand-alone vep script and post-process
output to generate html results table
=cut
# TODO: exons not sorted naturally; data supplied to tt as chr:location key, not exon
# export RDBO_DEBUG=1; bin/app.pl to switch on query debug
use Modern::Perl qw(2012); # 5.14
use autodie;
use FindBin qw($RealBin); # warn $RealBin;
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
use Archive::Extract; # $Archive::Extract::DEBUG = 1;
use Data::Dump qw(dump);
use File::Basename;
use Carp qw(croak);
use Data::Printer;
use Log::Report;
use Path::Tiny;
use IO::Scalar;
use Try::Tiny (); # to avoid clash with exported Log::Report::try()
use Template;
use DateTime;
use IO::All;
use Git;
use Dancer2;
use Dancer2::Plugin::LogReport;
use NGS::DB;
use NGS::VEP;
use NGS::VEP::BioSphere; # api_version()
# disable sql_trace if test script (needs explicit export SQL_TRACE=1 if req'd):
set enable_sql_trace => 0 if $ENV{HARNESS_VERSION};
set auto_page => 1;
# given & when moved to experimental in 5.18
no if $] >= 5.018, warnings => 'experimental::smartmatch';
# --- Globals ------------------------------------------------------------------
# get VEP version from Bio::EnsEMBL::ApiVersion::software_version:
use constant VEP_SRC_FILENAME => 'data_src'; # form param containing filename
use constant VEP_OUTPUT_TT => 'vep/result.tt'; # tmpl to handle vep results
use constant VEP_INPUT_TT => 'vep.tt'; # tmpl contains vep data input form
use constant API_VERSION => NGS::VEP::BioSphere->new->api_version;
my $repo = path($RealBin, '..', '.git')->realpath; # warn $repo;
use constant REPO => Git->repository( Directory => $repo );
our $VERSION = _app_version(); # warn $VERSION;
hook before => sub {
var app_version => $VERSION; # NGS app
var api_version => API_VERSION; # vep version
# set SQL_TRACE flag if configured:
$ENV{SQL_TRACE} = setting('enable_sql_trace');
$Local::QueryLogger::CurrentUsername = session->id;
};
#-------------------------------------------------------------------------------
hook before_template_render => sub {
my $tokens = shift;
$tokens->{nsort_labno} = sub {
my $list = shift || return; # warn dump $list;
return sort _sort_by_labno(@$list);
};
$tokens->{nsort_exon} = sub { # can't do this yet - key = chr:location not exon
my $list = shift || return; # warn dump $list;
return sort _sort_by_exon(@$list);
};
};
#-------------------------------------------------------------------------------
get '/' => sub { template 'index' }; # default index page
#-------------------------------------------------------------------------------
get '/vep' => sub {
debug 'GET upload page';
my $bio = NGS::VEP::BioSphere->new;
my $types = $bio->get_cache_types; # warn dump $types; # core, refseq, etc
my %dbs = map +($_ => 1), @$types; # cache types available to current api version
var cache => \%dbs;
template VEP_INPUT_TT, { title => 'Upload VEP DataFile' };
};
#-------------------------------------------------------------------------------
post '/vep' => sub {
debug 'POST upload page'; # debug "Params are : " . dump( request->params );
my %allparams = params; # debug dump \%allparams;
my $input = {
form_opts => \%allparams,
data_file => upload(VEP_SRC_FILENAME) || undef,
}; # warn dump $input->{data_file};
_validate_user_input($input); # debug 'Validation results: ' . dump($input);
#-- display any errors
if ( exists $input->{upload_error} ) { debug 'have upload error';
$input->{title} = 'Upload Data Error';
$input->{warning_message} = 'You must upload the file again!';
return template VEP_INPUT_TT, $input;
}
#-- passed validation
debug 'OK passed the error checks.';
{ # send submitted src data to vep script:
=begin # Log::Report::try method - need to adjust t/017_data.t
my $data;
try { # Log::Report::try
$data = _parse($input);
var results => $data;
};
$@->reportAll(reason => 'ERROR'); # issues a re-direct so .t fails
=cut
=begin # Try::Tiny::try method:
Try::Tiny::try {
my $data = _parse($input);
var results => $data;
}
Try::Tiny::catch { var err_msg => $_ }; # catch any fatals, return a 500.tt:
return template 500 if var 'err_msg'; # err_msg available to tt in vars
=cut
#=begin # Log::Report process() method:
my $data;
if ( process sub { $data = _parse($input) } ) {
var results => $data;
}
else { return template 'error.tt' }
#=cut
}
# delete temp file (if exists; auto-deleted for .t uploads):
unless ( $input->{data_file}->{temp_fh} ) { # only exists for .t uploads
if ( my $tmp_file = $input->{data_file}->tempname ) {
io($tmp_file)->unlink if -e $tmp_file;
}
}
return template VEP_OUTPUT_TT, { title => 'VEP Results' };
};
#-------------------------------------------------------------------------------
get '/download/:tbl/:file' => sub {
my $db = NGS::DB->new; # p session->id;
$db->session_id(session->id);
my $db_tbl = param('tbl'); # p $db_tbl;
my $data = $db->get_vep_data($db_tbl); # p $data;
# tab-delimit each col, new-line delimit each row:
my $content = do{
no warnings 'uninitialized'; # eg sift & polyphen
join "\n", map { join "\t", @$_ } @$data;
}; # p $content;
my $filename = join '_', DateTime->today->ymd(''), $db_tbl, param('file'); # p $filename;
{ # send_file fails() with filehandle, sending $content back as txt file now:
$filename .= '.txt' unless $filename =~ m!\.txt\Z!; # p $filename;
send_file( \$content, content_type => 'text/plain', filename => $filename );
}
=begin # filehandle method - fails in D2 0.161:
{
my $file = $db_tbl . '.txt';
my $zip = Archive::Zip->new();
my $string_member = $zip->addString( $content, $file );
$string_member->desiredCompressionMethod( COMPRESSION_DEFLATED );
my $ref; # scalar ref to convert to filehandle to return via send_file()
{ # writeToFileHandle:
my $fh = new IO::Scalar \$ref;
$zip->writeToFileHandle($fh) == AZ_OK
or die 'cannot writeToFileHandle()';
}
my $filename = join '_', DateTime->today->ymd(''), $db_tbl, param('file');
$filename .= '.zip' unless $filename =~ m!\.zip\Z!; # append .zip unless already
# to write file to disk - can't delete after send_file() though:
# unless ( $zip->writeToFileNamed('/tmp/'.$filename) == AZ_OK ) {
# die 'writeToFileNamed error';
# }
# send_file('/tmp/'.$filename, system_path => 1);
my @args = ( content_type => 'application/x-gzip', filename => $filename );
return send_file( \$ref, @args );
}
=cut
};
#-------------------------------------------------------------------------------
get '/gitlog' => sub {
my @args = ( '--pretty=format:"%ad :: %s"', '--date=relative' );
my @revs = REPO->command('log', @args );
template 'gitlog', { log => \@revs };
};
#-------------------------------------------------------------------------------
sub _parse {
my $args = shift; # warn dump $args->{form_opts};
my $data_file = $args->{data_file}; # warn dump $data_file;
my $filename = $data_file->{filename}; # warn $filename;
{ # add config to $args:
# add session id to config:
set session_id => session->id; # warn config->{session_id};
# add running environment for NGS::VEP (unless already set in Test::Common):
set environment => dancer_app->environment unless config->{environment};
$args->{config} = config; # p config;
}
my $vep = NGS::VEP->new($args);
# permitted file suffixes:
my @exts = qw(.txt .zip .vcf);
my ($dir, $name, $ext) = fileparse($filename, @exts); # warn dump [$dir, $name, $ext];
given ($ext) {
# single vcf file:
when ('.vcf') { # say "$filename is a vcf file";
# create file in /tmp for compatibility with .zip:
my $f = File::Spec->catfile('/tmp', $filename); # p $f;
$data_file->content > io($f);
my %h = (
# filename => $filename, content => $data_file->content, # needs much work in VEP
outdir => '/tmp',
files => [ $f ],
); # warn dump \%h;
return $vep->miseq_vcf(\%h);
}
# zipped vcf file or directory of vcf files:
when ('.zip') { # say "$filename is a zip file";
# get list of files and extraction path of archive:
my $ref = _unzip_file($data_file); # p $ref;
return $vep->miseq_vcf($ref);
}
# roche 454 text file:
when ('.txt') { # say "$filename is a text file";
return $vep->roche_454;
}
default { # return unknown filetype message to start page:
my ($suffix) = $filename =~ /\.(\w+)\Z/;
my $str = sprintf q!"%s" is an unknown file type (not txt, vcf or zip)!,
$suffix; # 'err' available to tt as param.err
forward '/vep', { err => $str }, { method => 'GET' };
}
}
}
#-------------------------------------------------------------------------------
sub _validate_user_input {
my $validation_report = shift;
if ( defined $validation_report->{data_file} ) {
debug 'At least there is a file uploaded.';
#------ Check that the file is a valid data file
$validation_report->{upload_error} = 'Not a valid data file!'
unless _validate_file( $validation_report->{data_file} );
}
else {
$validation_report->{upload_error} = 'No data file uploaded!';
}
}
#-------------------------------------------------------------------------------
sub _validate_file {
my $in_file = shift; # warn dump $in_file;
my $cfg = config->{InputFile}; # warn dump $cfg;
my $max_file_size = $cfg->{max_file_size} * 1024 * 1024; # MB
my $file_suffix = $cfg->{file_suffix};
return (
$in_file
&& ( lc $in_file->basename =~ /$file_suffix\Z/ )
&& ( $in_file->size <= $max_file_size )
);
}
#-------------------------------------------------------------------------------
sub _unzip_file {
my $data_file = shift; # warn dump $data_file;
my $tmp_file = $data_file->tempname; # warn $tmp_file;
# my $filename = $data_file->filename; # warn $filename; # don't need it
my $ae = Archive::Extract->new( archive => $tmp_file ); # warn $ae->type;
my $ok = $ae->extract( to => '/tmp' ); # warn dump $ok;
if ($ok) {
my $f = $ae->files; # arrayref of zip filenames, relative to unzip location
my @files = map { File::Spec->catfile('/tmp', $_) } @$f; # make absolute
# p $f; p @files;
my %h = (
outdir => $ae->extract_path,
files => \@files,
); # warn dump \%h;
io($tmp_file)->unlink; # delete zip; extracted dir deleted later
return \%h;
}
else { error $ae->error }
}
#-------------------------------------------------------------------------------
sub _sort_by_labno {
my ($y1, $r1) = split '_', $a; # warn Dumper [$r1, $y1];
my ($y2, $r2) = split '_', $b; # warn Dumper [$r2, $y2];
return $y1 <=> $y2 || $r1 <=> $r2;
}
#-------------------------------------------------------------------------------
sub _sort_by_exon { return $a cmp $b }
#-------------------------------------------------------------------------------
sub _app_version { REPO->command_oneline('rev-list', 'HEAD', '--count') }
true;