package NGS::DB; use Moo; use Try::Tiny (); # so try() doesn't conflict with Log::Report use Path::Tiny; # provides slurp() so IO::All unnecessary use Log::Report; use Data::Dumper; use Data::Printer; use FindBin qw($RealBin); # warn $RealBin; use Time::HiRes qw(gettimeofday tv_interval); use SQL::Abstract; use SQL::Abstract::Plugin::InsertMulti; use lib '/home/raj/perl-lib'; use Local::MooX::Types qw(String HashReference); use Local::DB; # switch off auto-date-inflation until all apps date handling updated: $Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; has session_id => ( is => 'rw', isa => String ); # optional, eg data for download has ref_libs => ( is => 'lazy' ); # SQLite filesystem db has test_db => ( is => 'lazy' ); # SQLite in-memory db has app_env => ( is => 'rw', isa => String ); # optional, eg for saving data has vep_db => ( is => 'lazy' ); # MySQL ngs db has errors => ( is => 'rw', isa => HashReference, default => sub { {} } ); has debug => ( is => 'rw', required => 0 ); # passed in VEP::save_data() only sub known_locations { my $self = shift; my $dbix = $self->ref_libs; my $query = $dbix->select('locations', ['position', 'action']); my %h; while ( $query->into( my ($position, $action) ) ) { $h{$action}{$position} = 1; } # p %h; return \%h; } sub transcripts { my $self = shift; my $dbix = $self->ref_libs; my $map = $dbix->select('transcripts', ['gene', 'feature'])->map; return $map; } sub ref_table { my $self = shift; my $dbix = $self->ref_libs; my $ref_tbl = $dbix->select('ref_seq', '*')->map_hashes('gene'); # p $ref_tbl; return $ref_tbl; } sub update_vep_table { my ($self, $tbl, $data) = @_; # str, AoH # $self->_debug($data); # p $data; my $dbix = ( $self->app_env eq 'test' ) ? $self->test_db : $self->vep_db; # delete any already there: $dbix->delete($tbl, { id => $self->session_id }); # warn $self->session_id; my $cols = $self->get_cols($tbl); my $vars = join ',', grep { $_ ne 'timestamp' } @$cols; my $t0 = [gettimeofday]; # set t0 =begin # usual dbix methods way too slow (eg for 1100 row vep input): $dbix->insert($tbl, $_) for @$data; # 35 secs my $sql = "insert into $tbl($vars) values (??)"; $dbix->query($sql, @{$_}{@$cols}) for @$data; 50 secs =cut my $sql = SQL::Abstract->new; # allows AoA @all_data insert - much faster my @all_data = map { [ @{$_}{@$cols} ] } @$data; # p @all_data; # AoA # insert_multi() much faster than a looping query() or insert(): my ($stmt, @bind) = $sql->insert_multi($tbl, $cols, \@all_data); # p $stmt; # p @bind; Try::Tiny::try { $dbix->query($stmt, @bind) } Try::Tiny::catch { # p $tbl; p $_; # error caught in $_ my %h = ( $tbl => "error updating $tbl table - " . $dbix->error ); $self->errors(\%h); # p $dbix->error; }; # p $self->error; return 0 if $self->errors->{$tbl}; # can't return from inside try/catch block # my $runtime = tv_interval $t0, [gettimeofday]; # p $runtime; # about 1100/sec return 1; } sub get_vep_data { my ($self, $tbl) = @_; my $dbix = $self->vep_db; my $session_id = $self->session_id; my $all_cols = $self->get_cols($tbl); # p $all_cols; my @cols = grep { $_ !~ m!\A(timestamp|id)\Z! } @$all_cols; # p @cols; my @args = ({ id => $session_id }); # where clause # order-by if sample_data: push @args, [ qw/year request_number exon_ref/ ] if $tbl eq 'sample_data'; my @data = $dbix->select($tbl, \@cols, @args)->arrays; { # add header row: my $headers = $self->get_headers($tbl); # p $headers; unshift @data, $headers; # p $data; } return \@data; } sub get_cols { my ($self, $tbl) = @_; my $dbix = $self->vep_db; my @tbl_data = $dbix->query('show columns from '.$tbl)->hashes; # p @tbl_data; no warnings 'uninitialized'; # non-timestamp col has no default val my @cols = map $_->{field}, grep { $_->{default} !~ /timestamp/i } @tbl_data; return \@cols; } # get column length definitions (for char & vachar cols): sub get_col_defs { my ($self, $tbl) = @_; my $dbix = $self->vep_db; my %h; my @tbl_data = $dbix->query('show columns from '.$tbl)->hashes; # p @tbl_data; COL: for my $col(@tbl_data) { my $field = $col->{field}; my $type = $col->{type}; # p $type; my ($max_size) = $type =~ m!\((\d+)\)! or next COL; # don't use || here !! $h{$field} = $max_size; } # p %h; return \%h; } sub get_headers { my ($self, $tbl) = @_; my $cols = $self->get_cols($tbl); my @headers = grep { $_ !~ m!\A(timestamp|id)\Z! } @$cols; return \@headers; } #------------------------------------------------------------------------------- sub _build_ref_libs { # SQLite db my $db = path($RealBin, '..', 'ngs.sqlite')->realpath; # warn 'DB:'. $db; my $dsn = "dbi:SQLite:dbname=$db"; my $o = Local::DB->dbix({ dsn => $dsn }); # p $o; return $o; } sub _build_vep_db { Local::DB->dbix({ dbname => 'ngs' }) } # MySQL sub _build_test_db { my $dbix = Local::DB->dbix({ dsn => 'dbi:SQLite:dbname=:memory:' }); # p $dbix; my $schema = _schema(); # p @schema; do { $dbix->dbh->do($_) || die $dbix->db->err_str } for @$schema; # $dbix->error doesn't work here return $dbix; } sub _debug { my ($self, $data) = @_; my $fh = $self->debug; print $fh p $data; } sub _schema { # read ngs_setup.sql & convert to sqlite compatible: my $src = path($RealBin, '..', 'ngs_setup.sql')->slurp; # p $src; # strip mysql backticks & COMMENT entries: $src =~ s/`//g; $src =~ s/COMMENT=.*//g; # warn $src; my @tables = split /ENGINE=InnoDB/, $src; # warn $_ for @tables; return [ grep /CREATE TABLE/, @tables ]; # last array item empty } 1;