package NGS; =begin handles NGS data input from either Roche 454 .txt file or zipped Illumina MiSeq vcf files. Pre-process data files to generate an Ensembl Variant Effect Predictor (vep) input file. Sends vep input file to stand-alone vep script and post-process output to generate html results table =cut # TODO: exons not sorted naturally; data supplied to tt as chr:location key, not exon # export RDBO_DEBUG=1; bin/app.pl to switch on query debug use Modern::Perl qw(2012); # 5.14 use autodie; use FindBin qw($RealBin); # warn $RealBin; use Archive::Zip qw( :ERROR_CODES :CONSTANTS ); use Archive::Extract; # $Archive::Extract::DEBUG = 1; use Data::Dump qw(dump); use File::Basename; use Carp qw(croak); use Data::Printer; use Log::Report; use Path::Tiny; use IO::Scalar; use Try::Tiny (); # to avoid clash with exported Log::Report::try() use Template; use DateTime; use IO::All; use Git; use Dancer2; use Dancer2::Plugin::LogReport; use NGS::DB; use NGS::VEP; use NGS::VEP::BioSphere; # api_version() # disable sql_trace if test script (needs explicit export SQL_TRACE=1 if req'd): set enable_sql_trace => 0 if $ENV{HARNESS_VERSION}; set auto_page => 1; # given & when moved to experimental in 5.18 no if $] >= 5.018, warnings => 'experimental::smartmatch'; # --- Globals ------------------------------------------------------------------ # get VEP version from Bio::EnsEMBL::ApiVersion::software_version: use constant VEP_SRC_FILENAME => 'data_src'; # form param containing filename use constant VEP_OUTPUT_TT => 'vep/result.tt'; # tmpl to handle vep results use constant VEP_INPUT_TT => 'vep.tt'; # tmpl contains vep data input form use constant API_VERSION => NGS::VEP::BioSphere->new->api_version; my $repo = path($RealBin, '..', '.git')->realpath; # warn $repo; use constant REPO => Git->repository( Directory => $repo ); our $VERSION = _app_version(); # warn $VERSION; hook before => sub { var app_version => $VERSION; # NGS app var api_version => API_VERSION; # vep version # set SQL_TRACE flag if configured: $ENV{SQL_TRACE} = setting('enable_sql_trace'); $Local::QueryLogger::CurrentUsername = session->id; }; #------------------------------------------------------------------------------- hook before_template_render => sub { my $tokens = shift; $tokens->{nsort_labno} = sub { my $list = shift || return; # warn dump $list; return sort _sort_by_labno(@$list); }; $tokens->{nsort_exon} = sub { # can't do this yet - key = chr:location not exon my $list = shift || return; # warn dump $list; return sort _sort_by_exon(@$list); }; }; #------------------------------------------------------------------------------- get '/' => sub { template 'index' }; # default index page #------------------------------------------------------------------------------- get '/vep' => sub { debug 'GET upload page'; my $bio = NGS::VEP::BioSphere->new; my $types = $bio->get_cache_types; # warn dump $types; # core, refseq, etc my %dbs = map +($_ => 1), @$types; # cache types available to current api version var cache => \%dbs; template VEP_INPUT_TT, { title => 'Upload VEP DataFile' }; }; #------------------------------------------------------------------------------- post '/vep' => sub { debug 'POST upload page'; # debug "Params are : " . dump( request->params ); my %allparams = params; # debug dump \%allparams; my $input = { form_opts => \%allparams, data_file => upload(VEP_SRC_FILENAME) || undef, }; # warn dump $input->{data_file}; _validate_user_input($input); # debug 'Validation results: ' . dump($input); #-- display any errors if ( exists $input->{upload_error} ) { debug 'have upload error'; $input->{title} = 'Upload Data Error'; $input->{warning_message} = 'You must upload the file again!'; return template VEP_INPUT_TT, $input; } #-- passed validation debug 'OK passed the error checks.'; { # send submitted src data to vep script: =begin # Log::Report::try method - need to adjust t/017_data.t my $data; try { # Log::Report::try $data = _parse($input); var results => $data; }; $@->reportAll(reason => 'ERROR'); # issues a re-direct so .t fails =cut =begin # Try::Tiny::try method: Try::Tiny::try { my $data = _parse($input); var results => $data; } Try::Tiny::catch { var err_msg => $_ }; # catch any fatals, return a 500.tt: return template 500 if var 'err_msg'; # err_msg available to tt in vars =cut #=begin # Log::Report process() method: my $data; if ( process sub { $data = _parse($input) } ) { var results => $data; } else { return template 'error.tt' } #=cut } # delete temp file (if exists; auto-deleted for .t uploads): unless ( $input->{data_file}->{temp_fh} ) { # only exists for .t uploads if ( my $tmp_file = $input->{data_file}->tempname ) { io($tmp_file)->unlink if -e $tmp_file; } } return template VEP_OUTPUT_TT, { title => 'VEP Results' }; }; #------------------------------------------------------------------------------- get '/download/:tbl/:file' => sub { my $db = NGS::DB->new; # p session->id; $db->session_id(session->id); my $db_tbl = param('tbl'); # p $db_tbl; my $data = $db->get_vep_data($db_tbl); # p $data; # tab-delimit each col, new-line delimit each row: my $content = do{ no warnings 'uninitialized'; # eg sift & polyphen join "\n", map { join "\t", @$_ } @$data; }; # p $content; my $filename = join '_', DateTime->today->ymd(''), $db_tbl, param('file'); # p $filename; { # send_file fails() with filehandle, sending $content back as txt file now: $filename .= '.txt' unless $filename =~ m!\.txt\Z!; # p $filename; send_file( \$content, content_type => 'text/plain', filename => $filename ); } =begin # filehandle method - fails in D2 0.161: { my $file = $db_tbl . '.txt'; my $zip = Archive::Zip->new(); my $string_member = $zip->addString( $content, $file ); $string_member->desiredCompressionMethod( COMPRESSION_DEFLATED ); my $ref; # scalar ref to convert to filehandle to return via send_file() { # writeToFileHandle: my $fh = new IO::Scalar \$ref; $zip->writeToFileHandle($fh) == AZ_OK or die 'cannot writeToFileHandle()'; } my $filename = join '_', DateTime->today->ymd(''), $db_tbl, param('file'); $filename .= '.zip' unless $filename =~ m!\.zip\Z!; # append .zip unless already # to write file to disk - can't delete after send_file() though: # unless ( $zip->writeToFileNamed('/tmp/'.$filename) == AZ_OK ) { # die 'writeToFileNamed error'; # } # send_file('/tmp/'.$filename, system_path => 1); my @args = ( content_type => 'application/x-gzip', filename => $filename ); return send_file( \$ref, @args ); } =cut }; #------------------------------------------------------------------------------- get '/gitlog' => sub { my @args = ( '--pretty=format:"%ad :: %s"', '--date=relative' ); my @revs = REPO->command('log', @args ); template 'gitlog', { log => \@revs }; }; #------------------------------------------------------------------------------- sub _parse { my $args = shift; # warn dump $args->{form_opts}; my $data_file = $args->{data_file}; # warn dump $data_file; my $filename = $data_file->{filename}; # warn $filename; { # add config to $args: # add session id to config: set session_id => session->id; # warn config->{session_id}; # add running environment for NGS::VEP (unless already set in Test::Common): set environment => dancer_app->environment unless config->{environment}; $args->{config} = config; # p config; } my $vep = NGS::VEP->new($args); # permitted file suffixes: my @exts = qw(.txt .zip .vcf); my ($dir, $name, $ext) = fileparse($filename, @exts); # warn dump [$dir, $name, $ext]; given ($ext) { # single vcf file: when ('.vcf') { # say "$filename is a vcf file"; # create file in /tmp for compatibility with .zip: my $f = File::Spec->catfile('/tmp', $filename); # p $f; $data_file->content > io($f); my %h = ( # filename => $filename, content => $data_file->content, # needs much work in VEP outdir => '/tmp', files => [ $f ], ); # warn dump \%h; return $vep->miseq_vcf(\%h); } # zipped vcf file or directory of vcf files: when ('.zip') { # say "$filename is a zip file"; # get list of files and extraction path of archive: my $ref = _unzip_file($data_file); # p $ref; return $vep->miseq_vcf($ref); } # roche 454 text file: when ('.txt') { # say "$filename is a text file"; return $vep->roche_454; } default { # return unknown filetype message to start page: my ($suffix) = $filename =~ /\.(\w+)\Z/; my $str = sprintf q!"%s" is an unknown file type (not txt, vcf or zip)!, $suffix; # 'err' available to tt as param.err forward '/vep', { err => $str }, { method => 'GET' }; } } } #------------------------------------------------------------------------------- sub _validate_user_input { my $validation_report = shift; if ( defined $validation_report->{data_file} ) { debug 'At least there is a file uploaded.'; #------ Check that the file is a valid data file $validation_report->{upload_error} = 'Not a valid data file!' unless _validate_file( $validation_report->{data_file} ); } else { $validation_report->{upload_error} = 'No data file uploaded!'; } } #------------------------------------------------------------------------------- sub _validate_file { my $in_file = shift; # warn dump $in_file; my $cfg = config->{InputFile}; # warn dump $cfg; my $max_file_size = $cfg->{max_file_size} * 1024 * 1024; # MB my $file_suffix = $cfg->{file_suffix}; return ( $in_file && ( lc $in_file->basename =~ /$file_suffix\Z/ ) && ( $in_file->size <= $max_file_size ) ); } #------------------------------------------------------------------------------- sub _unzip_file { my $data_file = shift; # warn dump $data_file; my $tmp_file = $data_file->tempname; # warn $tmp_file; # my $filename = $data_file->filename; # warn $filename; # don't need it my $ae = Archive::Extract->new( archive => $tmp_file ); # warn $ae->type; my $ok = $ae->extract( to => '/tmp' ); # warn dump $ok; if ($ok) { my $f = $ae->files; # arrayref of zip filenames, relative to unzip location my @files = map { File::Spec->catfile('/tmp', $_) } @$f; # make absolute # p $f; p @files; my %h = ( outdir => $ae->extract_path, files => \@files, ); # warn dump \%h; io($tmp_file)->unlink; # delete zip; extracted dir deleted later return \%h; } else { error $ae->error } } #------------------------------------------------------------------------------- sub _sort_by_labno { my ($y1, $r1) = split '_', $a; # warn Dumper [$r1, $y1]; my ($y2, $r2) = split '_', $b; # warn Dumper [$r2, $y2]; return $y1 <=> $y2 || $r1 <=> $r2; } #------------------------------------------------------------------------------- sub _sort_by_exon { return $a cmp $b } #------------------------------------------------------------------------------- sub _app_version { REPO->command_oneline('rev-list', 'HEAD', '--count') } true;