RSS Git Download  Clone
Raw Blame History
package LIMS::Local::QueryLog;

#===============================================================================
# see logs/README.txt for file permission settings
#===============================================================================

{ # mst fix for failure to re-chmod files after rotation in absence of server restart:
    package Log::Dispatch::File;
    use Class::Method::Modifiers;
    after log_message => sub { delete $_[0]->{chmodded} } # safe to retain after LDF fixed
}

# TODO: see __PACKAGE__ensure_warn_handler_capture if using Apache FastCgiServer

use strict;
use feature 'say';

use Local::DB;
use Try::Tiny;
use SQL::Abstract::Tree; # for pretty-formatting queries
use Data::Printer alias => 'p';

use Readonly;
Readonly::Array my @QUERY_STATEMENTS => qw( SELECT INSERT UPDATE DELETE SHOW );
Readonly::Array my @ENV_OK => qw( DEVEL_SERVER RDBO_DEBUG_ON );

our $SKIP_LOG_QUERY; # allow specific queries to be exempted from log files (eg user msgs)

$|++;

BEGIN {
    # declare var names to make available to _warn():
    use vars qw($LogArgs $cfg $dispatcher $path_to_app $mode $db);
    use POSIX qw(strftime);

    use IO::All;
    use LIMS::DB; # only required by format_query()
    use Data::Printer;
    use LIMS::Local::Utils;
    use LIMS::Local::Config;

    use Log::Dispatch;
    use Log::Dispatch::File;

    use Sub::Exporter -setup =>
        { exports => [ qw(set_querylog_args clear_querylog_args) ] };
    # create new Log::Dispatch::File object (needs to be inside BEGIN block):
    my $new_ldf = sub { Log::Dispatch::File->new(@_) };

    $db  = LIMS::DB->new_or_cached; # only required by format_query()
    $cfg = LIMS::Local::Config->instance;
    $path_to_app = $cfg->{path_to_app_root};

    $dispatcher = Log::Dispatch->new;

    # are we running under fastcgi (deployment) or lims_server (development) mode?
    $mode = $ENV{ROSEDB_DEVINIT} =~ /devinit_prod/
        ? 'deployment' : 'development';

    { # debug level log for SELECT/SHOW queries (fixed filename, rotated file):
        # deployment file needs to be cron-user-writable to allow script to rotate
        # (if www-data:www-data then it needs to be 666):
        my $permission = ( $mode eq 'deployment' ) ? 0666 : 0640;
        my %args = (
            name => 'queries',
            mode => 'append', # happens anyway if close_after_write set to 1
            filename  => "${path_to_app}/logs/${mode}.sql",
            min_level => 'debug',
            permissions => $permission,
            # required to force file to be created after rotation, forces append mode:
            close_after_write => ( $mode eq 'deployment' ), # not required otherwise
        );
        my $o = &$new_ldf(%args);
        $dispatcher->add($o);
    }

    { # info level log for UPDATE/INSERT/DELETE statements (deployment only)
        # production system = archived date-stamped file
        my %args = (
            name => 'edits',
            mode => 'append', # happens anyway with close_after_write set to 1
            filename  => "${path_to_app}/logs/today.sql",
            min_level => 'info',
            # needs to be cron-user-readable to allow script to archive:
            permissions => 0644, # 640 or if www-data:www-data then it needs to be 644
            # required to force file to be created after rotation, forces append mode:
            close_after_write => 1,
        );
        my $o = &$new_ldf(%args);
        $dispatcher->add($o);
    }

    $SIG{__WARN__} = \&_warn_handler;
}


sub set_querylog_args {
    $LogArgs = shift;
#    $LogArgs{username} = $username;
}

sub clear_log_args {
#    %LogArgs = ();
}

sub substitute_placeholders { # p $_[0];
    chomp( my $str = shift ); # p $str; # remove any trailing new-line (eg haem data)
    # remove & capture vars at end of string, matching across multiple lines
    # queries either have "bind params: foo, bar, etc", or "(foo, bar, etc)"
=begin # 2nd regex doesn't work for parens-within-parens, have to use a sub
    $str =~ s/(- bind params: (.*))//s; # bind params captured in $2
    $str =~ s/\(([^(]*)\)$//s; # bind params captured in $1;
=cut
    my ($query, $params) = extract_bind_params($str); # p $query; p $params;
    # RDBO hacked to use \0 to delimit placeholders (also Local::DBIxSimple):
    my @bind = split "\0", $params; # p \@bind; # split /,\s*/, $params;

    # expand omniholders if present (?? -> ?,?,?...) - pass query as ref so
    # changes persist:
    Local::DB::expand_omniholder($db, \$query, \@bind); # p $query;
  # uncomment this to fill in placeholders by SQL::Abstract::Tree:
  # return $query, @bind; # ? doesn't work for text file

    { # append undef's to end of @bind to match no. of placeholders for trailing
      # cols with db default vals which are not supplied in query eg created_at,
      # updated_at, etc; will be converted to 'NULL' later:
        my $n = () = $query =~ /\Q?/g; # warn $n; warn scalar @bind;
        push @bind, undef for ( 1 .. $n - scalar @bind ); # p \@bind;
    }
    # successively replace each '?' in query with next element of @bind array:
    for my $val (@bind) { # warn '=' x 20; p "val:$val; length:". length $val;
        if ( length $val ) { # "defined $val" doesn't work here
            # quote anything other than numbers
            unless ( Scalar::Util::looks_like_number($val) ) {
                # temporarily replace any '?' in $val (confuses the placeholder substitution):
                $val =~ s/\Q?/%%question_mark%%/g; # eg '? myeloproliferative disorder'
                $val = $db->dbh->quote($val);
            }
        }
        else { $val = 'NULL' } # warn $val; # p $query;
        # do placeholder substitution:
        my $ok = $query =~ s/\Q?/$val/; # warn $ok;
    } # p $query; warn '=' x 20;
    $query =~ s/= NULL\b/IS NULL/g; # if query contained '= ?' for an empty placeholder
    # replace temporary question-mark symbols:
    $query =~ s/%%question_mark%%/?/g;
        # warn $query;
    return $query;
}

sub format_query {
    my ($type, $str, @bind) = @_; # console or console_monochrome (for files)
=begin # done by SQL::Abstract::Tree now
    $str =~ s/(FROM|WHERE|ORDER BY|GROUP BY)/\n$1/g;
    $str =~ s/((?<!(OUTER|INNER) )JOIN)/\n\t$1/g;
    my $lh_parens = "\Q("; # p $lh_parens;
    my $rh_parens = "\Q)";
    $str =~ s/(OR $lh_parens)/\n\t$1/g; # p $str;
    # remove blank lines (including just spaces):
    my $query = join "\n", grep { $_ !~ /^\s*$/ } split "\n", $str; # p $query;
    return $query;
=cut
    # my %args = ( fill_in_placeholders => 0/1, profile => $type ); # if needed
    my $formatter = SQL::Abstract::Tree->new({ profile => $type });
    # customise:
    $formatter->indentmap->{from} = 0; # don't indent 'FROM'
    # $formatter->indentmap->{'left outer join'} = 1; # indents but doesn't
    # colorise - delete 'OUTER' instead:
    $str =~ s/(LEFT) OUTER (JOIN)/$1 $2/g; # for colour coding
    delete $formatter->indentmap->{on}; # don't insert new-line before 'ON'
    { # hide some characters which confuse SQLAT:
        $str =~ s/\Q?/%%question_mark%%/g; # eg '? myeloproliferative disorder'
        $str =~ s/\Q(/%%L_paren%%/g; # eg 'Follow-up CML (PB)'
        $str =~ s/\Q)/%%R_paren%%/g; # eg 'Follow-up CML (PB)'
    }
    # protect SQLAT from broken queries, return undef on error (so caller uses
    # original $warn instead); @bind only needed if fill_in_placeholders() required:
    my $query = try { $formatter->format($str, \@bind) } || return undef; # p $query;
    { # reverse hidden chars:
        $query =~ s/%%question_mark%%/?/g;
        $query =~ s/%%L_paren%%/(/g;
        $query =~ s/%%R_paren%%/)/g;
    }
    return $query;
}

# need to call this from app per-request (eg in cgiapp_prerun) if using fastcgi
# in non-external mode - CGI::Fast calls FCGI which re-defines $SIG{__WARN__};
# doesn't cause the same problem when app run as FastCgiExternalServer process
sub ensure_warn_handler_capture { $SIG{__WARN__} = \&_warn_handler; }

# queries either have "- bind params: foo, bar, etc", or "(foo, bar, etc)"
sub extract_bind_params {
    my $string = shift; # warn $string;

    # easiest is "- bind params: foo, bar, etc"
    if ( $string =~ s/(- bind params: (.*))//s ) { # bind params captured in $2
        return ($string, $2);
    }
    # otherwise have to be able to deal with parentheses-within-parentheses
    # eg "(Follow-up CML (PB))":
    my $str_length = length $string; # say $string_length;

    my $gnirts = reverse $string; # warn $gnirts;
    my $start  = my $end = my $i = 0; # set all at zero
    CHAR: # working backwards from original string, eg:
        # (4)                     )4(
        # (2,doi)                 )iod,2(
        # (Follow-up CML (PB))    ))BP( LMC pu-wolloF(
    while ( $gnirts =~ m!( [()] )!gx ) { # capture left-parens & right-parens
        if ( $1 eq ')' ) { # warn "\): \$i:$i; pos: " . pos($gnirts);
            # set end position in original string, ignore any further ')' instances:
            $end = $str_length - pos($gnirts) unless $i;
            $i++; # increment parens counter for each ')'
        }
        elsif ( $1 eq '(' ) { # warn "\(: \$i:$i; pos: " . pos($gnirts);
            $i--; # decrement parens counter for each '('
            # set start position in original string, unless another opened
            # parens remains unclosed (parens counter will still be true):
            next CHAR if $i;
            $start = $str_length - pos($gnirts);
        }
        # finished if we've reached counterpart to initial ')' in reversed string:
        last CHAR if ! $i;
    } # say "start: $start"; say "end: $end";

    # calculate length of params component of string:
    my $params_length = $end - $start - 1; # say "params_length: $params_length";
    # extract params component of string:
    my $params = substr($string, $start + 1, $params_length); # say $params;
    # my $ok = $original_string =~ s/\($params\)//; warn $ok; # can't cope with parens-within-parens
    # remove captured params component from string:
    my $query = substr($string, 0, $start); # say $query;
    return ($query, $params);
}

sub _warn_handler {
    my $warn = shift || return; # warn $SKIP_LOG_QUERY;

    my $sql_statement = grep { $warn =~ /\A$_/ } @QUERY_STATEMENTS;
    my $permitted_env = grep $ENV{$_}, @ENV_OK;

    # nothing required if it's an sql statement & skip logging configured, or
    # RDBO 'Making method ...' and not a permitted ENV:
    return 0 if ( $sql_statement && $SKIP_LOG_QUERY )
        || ( $warn =~ /^Making method/ && not $permitted_env );

    my $time_now = LIMS::Local::Utils::date_and_time_now;

    # write queries to log file:
    if ( $sql_statement ) {
        my ($query, @bind) = substitute_placeholders($warn);
        my $divider = '-+-' x 20;
        # $warn =~ s/\s{2}(JOIN)/\n\t$1/g; # line-break & tab JOIN's
        my $msg = sub { # reading pid id & size kills print run (timeout):
            # sprintf "[%s] %s [%s|%s|%s]\n%s\n", # [time] user [centre, pid ID & size] query:
            #    $time_now, uc $LogArgs->{user}, uc $cfg->{settings}{_centre},
            #    $$, format_number(`ps -p $$ -o size=`+0), $warn;
            sprintf "[%s] %s [%s]\n%s\n/* $divider */\n", # [time] user [centre] query:
                $time_now,
                uc ( $LogArgs->{user} || 'unknown' ),
                uc $cfg->{settings}{_centre},
                # using eval to protect formatter, returns undef on error, so just use $warn:
                format_query('console_monochrome', $query, @bind) || $warn; # monochrome for file
        };
        # deployment db edits go to 'info' level log for archiving:
        if ( $mode eq 'deployment' && $warn =~ /^(INSERT|UPDATE|DELETE)/ ) {
            $dispatcher->log(level => 'info', message => &$msg); # warn 'here';
        }
        $dispatcher->log(level => 'debug', message => &$msg);

        # dump colorised query to stdout if one of permitted envs:
        if ( $permitted_env ) {
            my $stdout = sprintf "%s\n%s", # coloured for console:
                # using eval to protect formatter, returns undef on error, so just use $warn:
                format_query('console', $query, @bind) || $warn, $divider;
            CORE::warn($stdout, "\n"); # \n suppresses "at line #"
        }
    }
    else { CORE::warn($warn) } # warn everything else

=begin # uncomment to dump process size info:
    # my $process_size = `ps -p $$ -o size=`+0;  # moved to 'else' Nov/2016
    if ( $ENV{FAST_CGI} ) {
        my $logfile = $path_to_app . '/logs/processes/' . $$ . '.csv'; # warn $logfile;
        # my $line = join ',', $time_now, $process_size;
        my $line = join ',', $time_now, 'warn:'.$warn; # eg will get here for script errs
        # io($logfile)->append($line, "\n"); # stopped logging Nov/2016
    }
    else {
        my $process_size = `ps -p $$ -o size=`+0;
        CORE::warn sprintf "* process-size=%s\n", $process_size;
    }
=cut
}

=begin # FileRotate causes serious performance hit here
    $dispatcher->add(
        Log::Dispatch::FileRotate->new(
            name        => 'query',
            min_level   => 'debug',
            filename    => "$path_to_app_root/logs/sql.txt",
            permissions => 0666,
            mode        => 'append',
            DatePattern => 'yyyy-MM-dd', # every day # or size => 1, # operates in date or size mode
        )
    );
=cut

1;