RSS Git Download  Clone
Raw Blame History
package NGS;

=begin
handles NGS data input from either Roche 454 .txt file or zipped Illumina MiSeq
vcf files. Pre-process data files to generate an Ensembl Variant Effect Predictor
(vep) input file. Sends vep input file to stand-alone vep script and post-process
output to generate html results table
=cut

# TODO: exons not sorted naturally; data supplied to tt as chr:location key, not exon

# export RDBO_DEBUG=1; bin/app.pl to switch on query debug

use Modern::Perl qw(2012); # 5.14
use autodie;

use FindBin qw($RealBin); # warn $RealBin;
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
use Archive::Extract; # $Archive::Extract::DEBUG = 1;
use Data::Dump qw(dump);
use File::Basename;
use Carp qw(croak);
use Data::Printer;
use Path::Tiny;
use IO::Scalar;
use Template;
use DateTime;
use IO::All;
use Try::Tiny;
use Git;

use Dancer2;

use NGS::DB;
use NGS::VEP;
use NGS::VEP::BioSphere; # api_version()

# disable sql_trace if test script (needs explicit export SQL_TRACE=1 if req'd):
set enable_sql_trace => 0 if $ENV{HARNESS_VERSION};

# given & when moved to experimental in 5.18
no if $] >= 5.018, warnings => 'experimental::smartmatch';

# --- Globals ------------------------------------------------------------------
# get VEP version from Bio::EnsEMBL::ApiVersion::software_version:
use constant VEP_SRC_FILENAME   => 'data_src'; # form param containing filename
use constant VEP_OUTPUT_TT      => 'vep/result.tt'; # tmpl to handle vep results
use constant VEP_INPUT_TT       => 'vep.tt'; # tmpl contains vep data input form
use constant API_VERSION        => NGS::VEP::BioSphere->new->api_version;

my $repo = path($RealBin, '..', '.git')->realpath; # warn $repo;
use constant REPO => Git->repository( Directory => $repo );
our $VERSION = _app_version(); # warn $VERSION;

hook before => sub {
    var app_version => $VERSION; # NGS app
    var api_version => API_VERSION; # vep version

    # set SQL_TRACE flag if configured:
    $ENV{SQL_TRACE} = setting('enable_sql_trace');
    $Local::QueryLogger::CurrentUsername = session->id;
};

#-------------------------------------------------------------------------------
hook before_template_render => sub {
    my $tokens = shift;

    $tokens->{nsort_labno} = sub {
		my $list = shift || return; # warn dump $list;
		return sort _sort_by_labno(@$list);
	};
    $tokens->{nsort_exon} = sub { # can't do this yet - key = chr:location not exon
		my $list = shift || return; # warn dump $list;
		return sort _sort_by_exon(@$list);
	};
};

#-------------------------------------------------------------------------------
get '/' => sub { template 'index' }; # default index page

#-------------------------------------------------------------------------------
get '/vep' => sub {
    debug 'GET upload page';

    my $bio = NGS::VEP::BioSphere->new;
    my $types = $bio->get_cache_types; # warn dump $types; # core, refseq, etc
    my %dbs = map +($_ => 1), @$types; # cache types available to current api version

    var cache => \%dbs;

    template VEP_INPUT_TT, { title => 'Upload VEP DataFile' };
};

#-------------------------------------------------------------------------------
post '/vep' => sub {
    debug 'POST upload page'; # debug "Params are : " . dump( request->params );

    my %allparams = params; # debug dump \%allparams;

    my $input = {
        form_opts => \%allparams,
        data_file => upload(VEP_SRC_FILENAME) || undef,
    }; # warn dump $input->{data_file};

    _validate_user_input($input); # debug 'Validation results: ' . dump($input);

#-- display any errors
    if ( exists $input->{upload_error} ) { debug 'have upload error';
		$input->{title} = 'Upload Data Error';
		$input->{warning_message} = 'You must upload the file again!';
        return template VEP_INPUT_TT, $input;
    }

#-- passed validation
    debug 'OK passed the error checks.';

    { # send submitted src data to vep script:
		try {
			my $data = _parse($input);
			var results => $data;
		}
		catch { var err_msg => $_ }; # catch any fatals, return a 500.tt:
		return template 500 if var 'err_msg'; # err_msg available to tt in vars
	}
    # delete temp file (if exists; auto-deleted for .t uploads):
    unless ( $input->{data_file}->{temp_fh} ) { # only exists for .t uploads
        if ( my $tmp_file = $input->{data_file}->tempname ) {
            io($tmp_file)->unlink if -e $tmp_file;
        }
    }

    return template VEP_OUTPUT_TT, { title => 'VEP Results' };
};

#-------------------------------------------------------------------------------
get '/download/:tbl/:file' => sub {
	my $db = NGS::DB->new; # p session->id;
	$db->session_id(session->id);

	my $db_tbl = param('tbl'); # p $db_tbl;

	my $data = $db->get_vep_data($db_tbl); # p $data;

	# tab-delimit each col, new-line delimit each row:
    my $content = do{
        no warnings 'uninitialized'; # eg sift & polyphen
        join "\n", map { join "\t", @$_ } @$data;
    }; # p $content;

    my $filename = join '_', DateTime->today->ymd(''), $db_tbl, param('file'); # p $filename;

    { # send_file fails() with filehandle, sending $content back as txt file now:
        $filename .= '.txt' unless $filename =~ m!\.txt\Z!; # p $filename;
        send_file( \$content, content_type => 'text/plain', filename => $filename );
    }
=begin # filehandle method - fails in D2 0.161:
  {
	my $file = $db_tbl . '.txt';
    my $zip  = Archive::Zip->new();

    my $string_member = $zip->addString( $content, $file );
    $string_member->desiredCompressionMethod( COMPRESSION_DEFLATED );

    my $ref; # scalar ref to convert to filehandle to return via send_file()
    { # writeToFileHandle:
        my $fh = new IO::Scalar \$ref;
        $zip->writeToFileHandle($fh) == AZ_OK
            or die 'cannot writeToFileHandle()';
    }

    my $filename = join '_', DateTime->today->ymd(''), $db_tbl, param('file');
    $filename .= '.zip' unless $filename =~ m!\.zip\Z!; # append .zip unless already

    # to write file to disk - can't delete after send_file() though:
    # unless ( $zip->writeToFileNamed('/tmp/'.$filename) == AZ_OK ) {
    #    die 'writeToFileNamed error';
    # }
    # send_file('/tmp/'.$filename, system_path => 1);

    my @args = ( content_type => 'application/x-gzip', filename => $filename );
    return send_file( \$ref, @args );
  }
=cut
};

#-------------------------------------------------------------------------------
get '/gitlog' => sub {
    my @args = ( '--pretty=format:"%ad :: %s"', '--date=relative' );
    my @revs = REPO->command('log', @args );
    template 'gitlog', { log => \@revs };
};

#-------------------------------------------------------------------------------
sub _parse {
    my $args = shift; # warn dump $args->{form_opts};

    my $data_file = $args->{data_file}; # warn dump $data_file;
    my $filename  = $data_file->{filename}; # warn $filename;

    { # add config to $args:
        # add session id to config:
        set session_id => session->id; # warn config->{session_id};
        # add running environment for NGS::VEP (unless already set in Test::Common):
        set environment => dancer_app->environment unless config->{environment};
        $args->{config} = config; # p config;
    }

    my $vep = NGS::VEP->new($args);

    # permitted file suffixes:
    my @exts = qw(.txt .zip .vcf);
    my ($dir, $name, $ext) = fileparse($filename, @exts); # warn dump [$dir, $name, $ext];

    given ($ext) {
        # single vcf file:
        when ('.vcf') { # say "$filename is a vcf file";
            # create file in /tmp for compatibility with .zip:
            my $f = File::Spec->catfile('/tmp', $filename); # warn $f;
            $data_file->content > io($f);

            my %h = (
                # filename => $filename, content => $data_file->content, # needs much work in VEP
                outdir => '/tmp',
                files  => [ $f ],
            ); # warn dump \%h;
            return $vep->miseq_vcf(\%h);
        }
        # zipped vcf file or directory of vcf files:
        when ('.zip') { # say "$filename is a zip file";
            # get list of files and extraction path of archive:
            my $ref = _unzip_file($data_file); # warn dump $ref;
            return $vep->miseq_vcf($ref);
        }
        # roche 454 text file:
        when ('.txt') { # say "$filename is a text file";
            return $vep->roche_454;
        }
        default { # return unknown filetype message to start page:
            my ($suffix) = $filename =~ /\.(\w+)\Z/;
            my $str = sprintf q!"%s" is an unknown file type (not txt, vcf or zip)!,
                $suffix; # 'err' available to tt as param.err
            forward '/vep', { err => $str }, { method => 'GET' };
        }
    }
}

#-------------------------------------------------------------------------------
sub _validate_user_input {
    my $validation_report = shift;

    if ( defined $validation_report->{data_file} ) {
        debug 'At least there is a file uploaded.';

        #------ Check that the file is a valid data file
        $validation_report->{upload_error} = 'Not a valid data file!'
            unless _validate_file( $validation_report->{data_file} );
    }
    else {
        $validation_report->{upload_error} = 'No data file uploaded!';
    }
}

#-------------------------------------------------------------------------------
sub _validate_file {
    my $in_file = shift; # warn dump $in_file;

    my $cfg = config->{InputFile}; # warn dump $cfg;

    my $max_file_size = $cfg->{max_file_size} * 1024 * 1024; # MB
    my $file_suffix   = $cfg->{file_suffix};

    return (
        $in_file
        && ( lc $in_file->basename =~ /$file_suffix\Z/ )
        && ( $in_file->size <= $max_file_size )
    );
}

#-------------------------------------------------------------------------------
sub _unzip_file {
    my $data_file = shift; # warn dump $data_file;

    my $tmp_file = $data_file->tempname; # warn $tmp_file;
#    my $filename = $data_file->filename; # warn $filename; # don't need it

    my $ae = Archive::Extract->new( archive => $tmp_file ); # warn $ae->type;

    my $ok = $ae->extract( to => '/tmp' ); # warn dump $ok;

    if ($ok) {
        my $f = $ae->files; # arrayref of zip filenames, relative to unzip location
        my @files = map { File::Spec->catfile('/tmp', $_) } @$f; # make absolute

        my %h = (
            outdir => $ae->extract_path,
            files  => \@files,
        ); # warn dump \%h;
        io($tmp_file)->unlink; # delete zip; extracted dir deleted later
        return \%h;
    }
    else { die $ae->error }
}

#-------------------------------------------------------------------------------
sub _sort_by_labno {
	my ($y1, $r1) = split '_', $a; # warn Dumper [$r1, $y1];
	my ($y2, $r2) = split '_', $b; # warn Dumper [$r2, $y2];

	return $y1 <=> $y2 || $r1 <=> $r2;
}

#-------------------------------------------------------------------------------
sub _sort_by_exon { return $a cmp $b }

#-------------------------------------------------------------------------------
sub _app_version { REPO->command_oneline('rev-list', 'HEAD', '--count') }

true;