package NGS::DB;
use Moo;
use IO::All;
use Try::Tiny (); # so try() doesn't conflict with Log::Report
use Path::Tiny;
use Log::Report;
use Data::Dumper;
use Data::Printer;
use FindBin qw($RealBin); # warn $RealBin;
use Time::HiRes qw(gettimeofday tv_interval);
use SQL::Abstract;
use SQL::Abstract::Plugin::InsertMulti;
use lib '/home/raj/perl-lib';
use Local::MooX::Types qw(String HashReference);
use Local::DB;
# switch off auto-date-inflation until all apps date handling updated:
$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1;
has session_id => ( is => 'rw', isa => String ); # optional, eg data for download
has ref_libs => ( is => 'lazy' ); # SQLite filesystem db
has test_db => ( is => 'lazy' ); # SQLite in-memory db
has app_env => ( is => 'rw', isa => String ); # optional, eg for saving data
has vep_db => ( is => 'lazy' ); # MySQL ngs db
has errors => ( is => 'rw', isa => HashReference, default => sub { {} } );
has debug => ( is => 'rw', required => 0 ); # passed in VEP::save_data() only
sub known_locations {
my $self = shift;
my $dbix = $self->ref_libs;
my $query = $dbix->select('locations', ['position', 'action']);
my %h;
while ( $query->into( my ($position, $action) ) ) {
$h{$action}{$position} = 1;
} # p %h;
return \%h;
}
sub transcripts {
my $self = shift;
my $dbix = $self->ref_libs;
my $map = $dbix->select('transcripts', ['gene', 'feature'])->map;
return $map;
}
sub ref_table {
my $self = shift;
my $dbix = $self->ref_libs;
my $ref_tbl = $dbix->select('ref_seq', '*')->map_hashes('gene'); # p $ref_tbl;
return $ref_tbl;
}
sub update_vep_table {
my ($self, $tbl, $data) = @_; # str, AoH
# $self->_debug($data); # p $data;
my $dbix = ( $self->app_env eq 'test' )
? $self->test_db
: $self->vep_db;
# delete any already there:
$dbix->delete($tbl, { id => $self->session_id }); # warn $self->session_id;
my $cols = $self->get_cols($tbl);
my $vars = join ',', grep { $_ ne 'timestamp' } @$cols;
my $t0 = [gettimeofday]; # set t0
=begin # usual dbix methods way too slow (eg for 1100 row vep input):
$dbix->insert($tbl, $_) for @$data; # 35 secs
my $sql = "insert into $tbl($vars) values (??)";
$dbix->query($sql, @{$_}{@$cols}) for @$data; 50 secs
=cut
my $sql = SQL::Abstract->new; # allows AoA @all_data insert - much faster
my @all_data = map { [ @{$_}{@$cols} ] } @$data; # p @all_data; # AoA
# insert_multi() much faster than a looping query() or insert():
my ($stmt, @bind) = $sql->insert_multi($tbl, $cols, \@all_data); # p $stmt; # p @bind;
Try::Tiny::try { $dbix->query($stmt, @bind) }
Try::Tiny::catch { # p $tbl; p $_; # error caught in $_
my %h = ( $tbl => "error updating $tbl table - " . $dbix->error );
$self->errors(\%h); # p $dbix->error;
}; # p $self->error;
return 0 if $self->errors->{$tbl}; # can't return from inside try/catch block
# my $runtime = tv_interval $t0, [gettimeofday]; # p $runtime; # about 1100/sec
return 1;
}
sub get_vep_data {
my ($self, $tbl) = @_;
my $dbix = $self->vep_db;
my $session_id = $self->session_id;
my $all_cols = $self->get_cols($tbl); # p $all_cols;
my @cols = grep { $_ !~ m!\A(timestamp|id)\Z! } @$all_cols; # p @cols;
my @args = ({ id => $session_id }); # where clause
# order-by if sample_data:
push @args, [ qw/year request_number exon_ref/ ] if $tbl eq 'sample_data';
my @data = $dbix->select($tbl, \@cols, @args)->arrays;
{ # add header row:
my $headers = $self->get_headers($tbl); # p $headers;
unshift @data, $headers; # p $data;
}
return \@data;
}
sub get_cols {
my ($self, $tbl) = @_;
my $dbix = $self->vep_db;
my @tbl_data = $dbix->query('show columns from '.$tbl)->hashes; # p @tbl_data;
no warnings 'uninitialized'; # non-timestamp col has no default val
my @cols = map $_->{field}, grep { $_->{default} !~ /timestamp/i } @tbl_data;
return \@cols;
}
# get column length definitions (for char & vachar cols):
sub get_col_defs {
my ($self, $tbl) = @_;
my $dbix = $self->vep_db;
my %h;
my @tbl_data = $dbix->query('show columns from '.$tbl)->hashes; # p @tbl_data;
COL:
for my $col(@tbl_data) {
my $field = $col->{field};
my $type = $col->{type}; # p $type;
my ($max_size) = $type =~ m!\((\d+)\)! or next COL; # don't use || here !!
$h{$field} = $max_size;
} # p %h;
return \%h;
}
sub get_headers {
my ($self, $tbl) = @_;
my $cols = $self->get_cols($tbl);
my @headers = grep { $_ !~ m!\A(timestamp|id)\Z! } @$cols;
return \@headers;
}
#-------------------------------------------------------------------------------
sub _build_ref_libs { # SQLite db
my $db = path($RealBin, '..', 'ngs.sqlite')->realpath; # warn 'DB:'. $db;
my $dsn = "dbi:SQLite:dbname=$db";
my $o = Local::DB->dbix({ dsn => $dsn }); # p $o;
return $o;
}
sub _build_vep_db { Local::DB->dbix({ dbname => 'ngs' }) } # MySQL
sub _build_test_db {
my $dbix = Local::DB->dbix({ dsn => 'dbi:SQLite:dbname=:memory:' }); # p $dbix;
my $schema = _schema(); # p @schema;
do { $dbix->dbh->do($_) || die $dbix->db->err_str } for @$schema; # $dbix->error doesn't work here
return $dbix;
}
sub _debug {
my ($self, $data) = @_;
my $fh = $self->debug; print $fh p $data;
}
sub _schema { # read ngs_setup.sql & convert to sqlite compatible:
my $src = io($RealBin . '/../ngs_setup.sql')->slurp; # p $src;
# strip mysql backticks & COMMENT entries:
$src =~ s/`//g;
$src =~ s/COMMENT=.*//g; # warn $src;
my @tables = split /ENGINE=InnoDB/, $src; # warn $_ for @tables;
return [ grep /CREATE TABLE/, @tables ]; # last array item empty
}
1;