package LIMS::Local::QueryLog; #=============================================================================== # see logs/README.txt for file permission settings #=============================================================================== { # mst fix for failure to re-chmod files after rotation in absence of server restart: package Log::Dispatch::File; use Class::Method::Modifiers; after log_message => sub { delete $_[0]->{chmodded} } # safe to retain after LDF fixed } # TODO: see __PACKAGE__ensure_warn_handler_capture if using Apache FastCgiServer use strict; use feature 'say'; use Local::DB; use Try::Tiny; use SQL::Abstract::Tree; # for pretty-formatting queries use Data::Printer alias => 'p'; use Readonly; Readonly::Array my @QUERY_STATEMENTS => qw( SELECT INSERT UPDATE DELETE SHOW ); Readonly::Array my @ENV_OK => qw( DEVEL_SERVER RDBO_DEBUG_ON ); our $SKIP_LOG_QUERY; # allow specific queries to be exempted from log files (eg user msgs) $|++; BEGIN { # declare var names to make available to _warn(): use vars qw($LogArgs $cfg $dispatcher $path_to_app $mode $db); use POSIX qw(strftime); use IO::All; use LIMS::DB; # only required by format_query() use Data::Printer; use LIMS::Local::Utils; use LIMS::Local::Config; use Log::Dispatch; use Log::Dispatch::File; use Sub::Exporter -setup => { exports => [ qw(set_querylog_args clear_querylog_args) ] }; # create new Log::Dispatch::File object (needs to be inside BEGIN block): my $new_ldf = sub { Log::Dispatch::File->new(@_) }; $db = LIMS::DB->new_or_cached; # only required by format_query() $cfg = LIMS::Local::Config->instance; $path_to_app = $cfg->{path_to_app_root}; $dispatcher = Log::Dispatch->new; # are we running under fastcgi (deployment) or lims_server (development) mode? $mode = $ENV{ROSEDB_DEVINIT} =~ /devinit_prod/ ? 'deployment' : 'development'; { # debug level log for SELECT/SHOW queries (fixed filename, rotated file): # deployment file needs to be cron-user-writable to allow script to rotate # (if www-data:www-data then it needs to be 666): my $permission = ( $mode eq 'deployment' ) ? 0666 : 0640; my %args = ( name => 'queries', mode => 'append', # happens anyway if close_after_write set to 1 filename => "${path_to_app}/logs/${mode}.sql", min_level => 'debug', permissions => $permission, # required to force file to be created after rotation, forces append mode: close_after_write => ( $mode eq 'deployment' ), # not required otherwise ); my $o = &$new_ldf(%args); $dispatcher->add($o); } { # info level log for UPDATE/INSERT/DELETE statements (deployment only) # production system = archived date-stamped file my %args = ( name => 'edits', mode => 'append', # happens anyway with close_after_write set to 1 filename => "${path_to_app}/logs/today.sql", min_level => 'info', # needs to be cron-user-readable to allow script to archive: permissions => 0644, # 640 or if www-data:www-data then it needs to be 644 # required to force file to be created after rotation, forces append mode: close_after_write => 1, ); my $o = &$new_ldf(%args); $dispatcher->add($o); } $SIG{__WARN__} = \&_warn_handler; } sub set_querylog_args { $LogArgs = shift; # $LogArgs{username} = $username; } sub clear_log_args { # %LogArgs = (); } sub substitute_placeholders { # p $_[0]; chomp( my $str = shift ); # p $str; # remove any trailing new-line (eg haem data) # remove & capture vars at end of string, matching across multiple lines # queries either have "bind params: foo, bar, etc", or "(foo, bar, etc)" =begin # 2nd regex doesn't work for parens-within-parens, have to use a sub $str =~ s/(- bind params: (.*))//s; # bind params captured in $2 $str =~ s/\(([^(]*)\)$//s; # bind params captured in $1; =cut my ($query, $params) = extract_bind_params($str); # p $query; p $params; # RDBO hacked to use \0 to delimit placeholders (also Local::DBIxSimple): my @bind = split "\0", $params; # p \@bind; # split /,\s*/, $params; # expand omniholders if present (?? -> ?,?,?...) - pass query as ref so # changes persist: Local::DB::expand_omniholder($db, \$query, \@bind); # p $query; # uncomment this to fill in placeholders by SQL::Abstract::Tree: # return $query, @bind; # ? doesn't work for text file { # append undef's to end of @bind to match no. of placeholders for trailing # cols with db default vals which are not supplied in query eg created_at, # updated_at, etc; will be converted to 'NULL' later: my $n = () = $query =~ /\Q?/g; # warn $n; warn scalar @bind; push @bind, undef for ( 1 .. $n - scalar @bind ); # p \@bind; } # successively replace each '?' in query with next element of @bind array: for my $val (@bind) { # warn '=' x 20; p "val:$val; length:". length $val; if ( length $val ) { # "defined $val" doesn't work here # quote anything other than numbers unless ( Scalar::Util::looks_like_number($val) ) { # temporarily replace any '?' in $val (confuses the placeholder substitution): $val =~ s/\Q?/%%question_mark%%/g; # eg '? myeloproliferative disorder' $val = $db->dbh->quote($val); } } else { $val = 'NULL' } # warn $val; # p $query; # do placeholder substitution: my $ok = $query =~ s/\Q?/$val/; # warn $ok; } # p $query; warn '=' x 20; $query =~ s/= NULL\b/IS NULL/g; # if query contained '= ?' for an empty placeholder # replace temporary question-mark symbols: $query =~ s/%%question_mark%%/?/g; # warn $query; return $query; } sub format_query { my ($type, $str, @bind) = @_; # console or console_monochrome (for files) =begin # done by SQL::Abstract::Tree now $str =~ s/(FROM|WHERE|ORDER BY|GROUP BY)/\n$1/g; $str =~ s/((? 0/1, profile => $type ); # if needed my $formatter = SQL::Abstract::Tree->new({ profile => $type }); # customise: $formatter->indentmap->{from} = 0; # don't indent 'FROM' # $formatter->indentmap->{'left outer join'} = 1; # indents but doesn't # colorise - delete 'OUTER' instead: $str =~ s/(LEFT) OUTER (JOIN)/$1 $2/g; # for colour coding delete $formatter->indentmap->{on}; # don't insert new-line before 'ON' { # hide some characters which confuse SQLAT: $str =~ s/\Q?/%%question_mark%%/g; # eg '? myeloproliferative disorder' $str =~ s/\Q(/%%L_paren%%/g; # eg 'Follow-up CML (PB)' $str =~ s/\Q)/%%R_paren%%/g; # eg 'Follow-up CML (PB)' } # protect SQLAT from broken queries, return undef on error (so caller uses # original $warn instead); @bind only needed if fill_in_placeholders() required: my $query = try { $formatter->format($str, \@bind) } || return undef; # p $query; { # reverse hidden chars: $query =~ s/%%question_mark%%/?/g; $query =~ s/%%L_paren%%/(/g; $query =~ s/%%R_paren%%/)/g; } return $query; } # need to call this from app per-request (eg in cgiapp_prerun) if using fastcgi # in non-external mode - CGI::Fast calls FCGI which re-defines $SIG{__WARN__}; # doesn't cause the same problem when app run as FastCgiExternalServer process sub ensure_warn_handler_capture { $SIG{__WARN__} = \&_warn_handler; } # queries either have "- bind params: foo, bar, etc", or "(foo, bar, etc)" sub extract_bind_params { my $string = shift; # warn $string; # easiest is "- bind params: foo, bar, etc" if ( $string =~ s/(- bind params: (.*))//s ) { # bind params captured in $2 return ($string, $2); } # otherwise have to be able to deal with parentheses-within-parentheses # eg "(Follow-up CML (PB))": my $str_length = length $string; # say $string_length; my $gnirts = reverse $string; # warn $gnirts; my $start = my $end = my $i = 0; # set all at zero CHAR: # working backwards from original string, eg: # (4) )4( # (2,doi) )iod,2( # (Follow-up CML (PB)) ))BP( LMC pu-wolloF( while ( $gnirts =~ m!( [()] )!gx ) { # capture left-parens & right-parens if ( $1 eq ')' ) { # warn "\): \$i:$i; pos: " . pos($gnirts); # set end position in original string, ignore any further ')' instances: $end = $str_length - pos($gnirts) unless $i; $i++; # increment parens counter for each ')' } elsif ( $1 eq '(' ) { # warn "\(: \$i:$i; pos: " . pos($gnirts); $i--; # decrement parens counter for each '(' # set start position in original string, unless another opened # parens remains unclosed (parens counter will still be true): next CHAR if $i; $start = $str_length - pos($gnirts); } # finished if we've reached counterpart to initial ')' in reversed string: last CHAR if ! $i; } # say "start: $start"; say "end: $end"; # calculate length of params component of string: my $params_length = $end - $start - 1; # say "params_length: $params_length"; # extract params component of string: my $params = substr($string, $start + 1, $params_length); # say $params; # my $ok = $original_string =~ s/\($params\)//; warn $ok; # can't cope with parens-within-parens # remove captured params component from string: my $query = substr($string, 0, $start); # say $query; return ($query, $params); } sub _warn_handler { my $warn = shift || return; # warn $SKIP_LOG_QUERY; my $sql_statement = grep { $warn =~ /\A$_/ } @QUERY_STATEMENTS; my $permitted_env = grep $ENV{$_}, @ENV_OK; # nothing required if it's an sql statement & skip logging configured, or # RDBO 'Making method ...' and not a permitted ENV: return 0 if ( $sql_statement && $SKIP_LOG_QUERY ) || ( $warn =~ /^Making method/ && not $permitted_env ); my $time_now = LIMS::Local::Utils::date_and_time_now; # write queries to log file: if ( $sql_statement ) { my ($query, @bind) = substitute_placeholders($warn); my $divider = '-+-' x 20; # $warn =~ s/\s{2}(JOIN)/\n\t$1/g; # line-break & tab JOIN's my $msg = sub { # reading pid id & size kills print run (timeout): # sprintf "[%s] %s [%s|%s|%s]\n%s\n", # [time] user [centre, pid ID & size] query: # $time_now, uc $LogArgs->{user}, uc $cfg->{settings}{_centre}, # $$, format_number(`ps -p $$ -o size=`+0), $warn; sprintf "[%s] %s [%s]\n%s\n/* $divider */\n", # [time] user [centre] query: $time_now, uc ( $LogArgs->{user} || 'unknown' ), uc $cfg->{settings}{_centre}, # using eval to protect formatter, returns undef on error, so just use $warn: format_query('console_monochrome', $query, @bind) || $warn; # monochrome for file }; # deployment db edits go to 'info' level log for archiving: if ( $mode eq 'deployment' && $warn =~ /^(INSERT|UPDATE|DELETE)/ ) { $dispatcher->log(level => 'info', message => &$msg); # warn 'here'; } $dispatcher->log(level => 'debug', message => &$msg); # dump colorised query to stdout if one of permitted envs: if ( $permitted_env ) { my $stdout = sprintf "%s\n%s", # coloured for console: # using eval to protect formatter, returns undef on error, so just use $warn: format_query('console', $query, @bind) || $warn, $divider; CORE::warn($stdout, "\n"); # \n suppresses "at line #" } } else { CORE::warn($warn) } # warn everything else =begin # uncomment to dump process size info: # my $process_size = `ps -p $$ -o size=`+0; # moved to 'else' Nov/2016 if ( $ENV{FAST_CGI} ) { my $logfile = $path_to_app . '/logs/processes/' . $$ . '.csv'; # warn $logfile; # my $line = join ',', $time_now, $process_size; my $line = join ',', $time_now, 'warn:'.$warn; # eg will get here for script errs # io($logfile)->append($line, "\n"); # stopped logging Nov/2016 } else { my $process_size = `ps -p $$ -o size=`+0; CORE::warn sprintf "* process-size=%s\n", $process_size; } =cut } =begin # FileRotate causes serious performance hit here $dispatcher->add( Log::Dispatch::FileRotate->new( name => 'query', min_level => 'debug', filename => "$path_to_app_root/logs/sql.txt", permissions => 0666, mode => 'append', DatePattern => 'yyyy-MM-dd', # every day # or size => 1, # operates in date or size mode ) ); =cut 1;