RSS Git Download  Clone
Raw Blame History
package NGS::DB;

use Moo;
use IO::All;
use Try::Tiny;
use Path::Tiny;
use Data::Dumper;
use Data::Printer;
use FindBin qw($RealBin); # warn $RealBin;
use Time::HiRes qw(gettimeofday tv_interval);

use SQL::Abstract;
use SQL::Abstract::Plugin::InsertMulti;

use lib '/home/raj/perl-lib';
use Local::MooX::Types qw(String HashReference);
use Local::DB;


has session_id => ( is => 'rw', isa => String ); # optional, eg data for download 
has ref_libs   => ( is => 'lazy' ); # SQLite db
has vep_db     => ( is => 'lazy' ); # MySQL ngs db
has error      => ( is => 'rw', isa => HashReference, default => sub { {} } );
has debug      => ( is => 'rw', required => 0 ); # passed in VEP::save_data() only

sub known_locations {
	my $self = shift;	
	my $dbix = $self->ref_libs;
    my $query = $dbix->select('locations', ['position', 'action']);
    
    my %h;
    while ( $query->into( my ($position, $action) ) ) {
        $h{$action}{$position} = 1;
    } # p %h;
    return \%h;	
}

sub transcripts {
	my $self = shift;
	my $dbix = $self->ref_libs;
    my $map = $dbix->select('transcripts', ['gene', 'feature'])->map;
    return $map;
}

sub ref_table {
    my $self = shift;
	my $dbix = $self->ref_libs;
    my $ref_tbl = $dbix->select('ref_seq', '*')->map_hashes('gene'); # p $ref_tbl;
    return $ref_tbl;
}

sub update_vep_table {
    my ($self, $tbl, $data) = @_; # str, AoH
        # $self->_debug($data); 
    my $dbix = $self->vep_db; # p $data;
	# delete any already there:
    $dbix->delete($tbl, { id => $self->session_id }); # warn $self->session_id;
	
    my $cols = $self->get_cols($tbl);
    my $vars = join ',', grep { $_ ne 'timestamp' } @$cols;	

    my $t0 = [gettimeofday]; # set t0
  
=begin # usual dbix methods way too slow (eg for 1100 row vep input):
    $dbix->insert($tbl, $_) for @$data; # 35 secs
    my $sql = "insert into $tbl($vars) values (??)";
    $dbix->query($sql, @{$_}{@$cols}) for @$data; 50 secs
=cut

    my $sql = SQL::Abstract->new; # allows AoA @all_data insert - much faster
    
    my @all_data = map { [ @{$_}{@$cols} ] } @$data; # p @all_data; AoA
    # insert_multi() much faster than a looping query() or insert(): 
    my ($stmt, @bind) = $sql->insert_multi($tbl, $cols, \@all_data); # p $stmt; # p @bind;

	  try { $dbix->query($stmt, @bind) }
	catch { # p $tbl; p $_; # error caught in $_
        my %h = ( $tbl => "error updating $tbl table - " . $dbix->error );
		$self->error(\%h); # p $dbix->error; 
	}; # p $self->error;
	return 0 if $self->error->{$tbl}; # can't return from inside try/catch block
    
    # my $runtime = tv_interval $t0, [gettimeofday]; # p $runtime; # about 1100/sec
    return 1;
}

sub get_vep_data {
	my ($self, $tbl) = @_;
	
	my $dbix = $self->vep_db;
	my $session_id = $self->session_id;
	
	my $all_cols = $self->get_cols($tbl); # p $all_cols;
	my @cols = grep { $_ !~ m!\A(timestamp|id)\Z! } @$all_cols; # p @cols;
	
	my @args = ({ id => $session_id }); # where clause
	# order-by if sample_data:
	push @args, [ qw/year request_number exon_ref/ ] if $tbl eq 'sample_data';
	
	my @data = $dbix->select($tbl, \@cols, @args)->arrays;	
    { # add header row:
        my $headers = $self->get_headers($tbl); # p $headers;
        unshift @data, $headers; # p $data;
    }
	return \@data;
}

sub get_cols {
    my ($self, $tbl) = @_;
	my $dbix = $self->vep_db;
	
	no warnings 'uninitialized'; # non-timestamp col has no default val
	my @cols = map $_->{field}, grep { $_->{default} !~ /timestamp/i } 
		$dbix->query('show columns from '.$tbl)->hashes;
	return \@cols;
}

sub get_headers {
    my ($self, $tbl) = @_;
    
    my $cols = $self->get_cols($tbl);
    my @headers = grep { $_ !~ m!\A(timestamp|id)\Z! } @$cols;
    return \@headers;
}

#-------------------------------------------------------------------------------
sub _build_ref_libs { # SQLite db
    my $db = path($RealBin, '..', 'ngs.sqlite')->realpath; # warn 'DB:'. $db;
	my $dsn = "dbi:SQLite:dbname=$db";

	my $o = Local::DB->dbix({ dsn => $dsn }); # p $o;
    return $o;
}

sub _build_vep_db { Local::DB->dbix('ngs') } # MySQL

sub _debug {
    my ($self, $data) = @_;
    my $fh = $self->debug; print $fh p $data;
}

1;