P3X GitList

#!/usr/bin/perl

=begin
validates xml from genomics requests; uses same validation function as genomics_xml.pl
doesn't save xml file, or update any database table, or push to any remote location
=cut

use lib (
    '/home/raj/perl5/lib/perl5',
    '/home/raj/apps/HILIS4/lib',
    '/home/raj/perl-lib', # Local::XMLSimple - patched to escape single-apostrophe
);
use LIMS::Local::ScriptHelpers;
use XML::SAX::ParserFactory;
use XML::Validator::Schema;
use Local::XMLSimple qw(:strict); # need to specify ForceArray & KeyAttr if using :strict here
use Data::Printer alias => 'p', use_prototypes => 0;
use FindBin qw($Bin); # warn $Bin; exit;
use Clone qw(clone);
use Local::Utils;
use Data::Dumper;
use Modern::Perl;
use Path::Tiny;
use Local::DB;
use IO::All;

my $cfg_file = $Bin . '/../script/crons/lib/settings.cfg'; # p $cfg_file;
my $xsd_src  = $Bin . '/../setup/schema/xsd/genomics.xsd'; # p $xsd_src;

my $timestamp = LIMS::Local::Utils::time_now();
my $settings  = Config::Auto::parse($cfg_file); # p $settings; exit;
my $tools     = LIMS::Local::ScriptHelpers->new();
my $dbix      = Local::DB->dbix({ dbname => 'genomics' });

my $genomics_cfg = $settings->{genomics};      # p $genomics_cfg;
my $genomics_uat = $genomics_cfg->{uat};       # p $genomics_uat; exit;
my $sql_lib      = $tools->sql_lib();

# family_id_prefix = 1st 3 digits of rare_disease participant id base:
my ($family_id_prefix) = $genomics_uat->{rare_disease} =~ /^(\d{3})/; # p $family_id_prefix;

# set some package vars (auto-stringify datetime objects & don't log queries):
$Local::QueryLogger::NO_QUERY_LOGS = 1; # don't need queries in logs dir
$Local::DBIx::Simple::Result::STRINGIFY_DATETIME = 1; # as we hand $data straight to XMLout()

my %SQL = (
    demographics => $sql_lib->retr('genomics_demographics'),
    storage      => $sql_lib->retr('genomics_storage'),
    results      => $sql_lib->retr('genomics_results'),
    consent      => $sql_lib->retr('genomics_consent'),
);
my @lab_sections = do {
    my @args = ( 'lab_sections', ['section_name'], { is_active => 'yes' } );
    $dbix->select(@args)->column; # array
}; # p \@lab_sections;

# get request ids:
my @request_ids = $dbix->select('requests', 'id')->column; # p \@request_ids; exit;

{ # go:
    # get requests 1-to-1 data:
    my @requests = $dbix->query( $SQL{demographics}, @request_ids )->hashes;
    # p \@requests; exit;

	REQUEST:
    for my $req (@requests) { # p $req; # get results data:
        my $request_id = $req->{request_id};

         # allocate GeL/participant id if screened and not already done:
        if ( $req->{participant_id} eq 'UNKNOWN' ) { # unit number still at default
            if ( my $arm = $req->{arm} ) { # if screened
               my $base_id = $genomics_uat->{rare_disease}; # 122_000_000
                # add cancer increment if screened as cancer (ie to make 223_000_000)
                $base_id += $genomics_uat->{cancer_incr} if lc $arm eq 'cancer';
                $req->{participant_id} = $base_id + $request_id;
            }
            else {
                delete $req->{participant_id}; # will be invalid xml entry 'UNKNOWN'
            }
        }

        my %req_fields = map +($_ => $req->{$_}), # skip unrequired fields:
            grep { $_ !~ /\b(year|request_number|patient_case_id)\b/ }
                # skip undef vals to avoid uninitialized val warns from XML::Simple
                grep defined $req->{$_}, keys %$req; # p \%req_fields;

		# remove trailing 'cancer' from disease_type_registration:
		$req_fields{disease_type_registration} =~ s/\scancer\Z//i
            if $req_fields{disease_type_registration}; # only exists for cancer
        # lab number:
        $req_fields{lab_number} = join '/',
            $req->{request_number}, $req->{year} - 2000; # p \%req_fields;

        # build data structure for this request:
        my %h = ( Demographics => \%req_fields ); # p \%h;

        SECTION:
        for my $section_name ( @lab_sections ) { # p $section_name;
            my @data = do {
                my @bind = ( $request_id, $section_name );
                $dbix->query( $SQL{results}, @bind )->hashes;
            }; # p \@data;

			# remove any non-word chars from section name -> CamelCase:
            my $section_element_name = join '', map ucfirst $_,
                split '\W', $section_name; # p $element_name;

            RESULT: # each test/result pair for this section (query ensures no undef vals)
            for my $ref (@data) { # hashref of keys: test_name & result
                my $test_name = $ref->{test_name};
                $h{$section_element_name}{$test_name} = $ref->{result};
            }
        }
        # fluidx storage (1-to-many with request_id):
        if ( my @data = $dbix->query( $SQL{storage}, $request_id )->hashes ) {
            # ensure no undef values or XML::Simple v2.21+ issues uninitialized value warnings:
            remove_undef_values($_) for @data;
            $h{Storage}{Vials}{vial} = \@data; # p \@data;
        }
        { # consent taken & withdrawn - from request_lab_test_history file:
            my $data = $dbix->query( $SQL{consent}, $request_id )
				->map_hashes('action');
			if ( my $event = $data->{consent_given} ) {
				my @user = @{$event}{ qw/first_name last_name/ };
				$h{Consent}{consent_taken_by}
					= join ' ', map ucfirst $_, @user;
			}
			if ( my $event = $data->{consent_withdrawn} ) { # infrequently used:
				my @user = @{$event}{ qw/first_name last_name/ };
				$h{ConsentWithdrawal}{withdrawal_taken_by}
					= join ' ', map ucfirst $_, @user;
			}
        } # p \%h;
        { # consent form filename (need to allow for >1 file):
            my $filepath = get_destination_sub_dir($req); # p $filepath;
            if ( -e $filepath ) {
                my @contents = io($filepath)->all;

                FILE:
                for my $f (@contents) { # p $f;
                    next FILE unless $f->type eq 'file';
					my $filename = join '~', $request_id, $f->filename;
                    # add as array in case >1 file in dir
                    push @{ $h{Consent}{Files}{file} }, $filename;
                }
            }
        } # p \%h;

        my $lab_no = $req_fields{lab_number};

        # validate params before creating xml:
        if ( my $errs = validate_params(\%h) ) { # p $errs; # arrayref
            say $lab_no, ' ' , $_ for @$errs;
            next REQUEST;
        } # p \%h;

       # create xml:
        my $xml = to_xml(\%h); # say $xml;

		# validate xml, returns XML::SAX::Exception object on failure, undef on success:
		if ( my $error = validate_xml_against_xsd($xml) ) {
    		say $lab_no, ' ' , $error;
        }
        else {
            say $lab_no, ' validated OK';
        }
    }
}

#-------------------------------------------------------------------------------
sub to_xml {
    my $data = shift; # arrayref of request datasets

	# remove leading/trailing white-space (eg introduced by pasting from excel):
	_trim($data);

    # options for XMLout (needs array or hash):
    my %xs_opts = xs_options(); # p %xs_opts;
    my $xs = Local::XMLSimple->new(%xs_opts);

    my $ref = {
        FileCreationDateTime => $timestamp->datetime, # require string
        # add direct if NOT using xml element names, otherwise use format_request():
        # Record  => format_request($data),
        Record    => $data,
    }; # p $ref;

    # enclose xml in outer <add> block; add 'version' inline:
    # my $input = { add => $ref, version => 2 };
   	my $xml_out = $xs->XMLout($ref); # p $xml_out;
    return $xml_out;
}

#-------------------------------------------------------------------------------
sub validate_xml_against_xsd {
    my $xml = shift;
	# switch debug on to show data structure:
    my $validator = XML::Validator::Schema->new(file => $xsd_src, debug => 0); # p $validator;
    my $parser    = XML::SAX::ParserFactory->parser(Handler => $validator);
    eval { $parser->parse_string($xml); };
    return $@;
}

#-------------------------------------------------------------------------------
sub xs_options {
    my %opts = (
        SuppressEmpty => 0, # ? only works for xml_in
        NoEscape      => 0, # should be default anyway, but doesn't work
        RootName      => 'HILIS4Genomics',
        XMLDecl       => q!<?xml version="1.0" encoding="UTF-8"?>!,
        KeyAttr       => [],
        NoAttr        => 1, # gives inline (scalar) AND nested (arrayref) attributes
    );
    return %opts;
}

#-------------------------------------------------------------------------------
sub get_destination_sub_dir {
    my $ref = shift; # p $ref;

    my ($request_num, $year) = ($ref->{request_number}, $ref->{year}); # p [$year, $request_num];

    my $i = int ( $request_num / 100 ); # warn $i; # 1-99 = 0, 100-199 = 1, 200-299 = 2, etc

    my $mini_dir = sprintf '%s-%s',
        100 * $i || 1, # default to 1 if 0; 1, 100, 200, 300, etc
        100 * $i + 99; # 99, 199, 299, etc

    my $app_dir  = path($tools->path_to_app_root)->realpath; # p $app_dir;
    my $rel_path = 'static/files/genomics';

    my $destination_dir = join '/', # eg /path/to/uploads/2013
        $app_dir,
        $rel_path,
        $year,
        $mini_dir,
        $request_num; # p $destination_dir;
    return $destination_dir;
}

sub remove_undef_values { # based on kaitlyn's transform_datetime_to_string() below:
    my $r = shift; # p $r;
    return unless ref $r eq 'HASH';

    for my $key( keys %$r ) { # p [$key, $r->{$key}];
        if ( ref $r->{$key} eq 'HASH' ) { # recursive call for hashrefs
            remove_undef_values( $r->{$key} );
        }
        defined $r->{$_} || delete $r->{$_} for keys %$r; # p $r;
    }
}

sub validate_params {
    my $data = shift; # p $data; # hashref

    my %h;

	my $c = clone $data; # to prevent auto-vivification
    # family ID is 9 digits & begins with 1st 3 digits of rare_disease
    # participant id base:
    if ( my $family_id = $c->{Approach}->{family_id} ) { # warn $family_id;
        # family_id should be participant_id + max 9 individuals:
            # doesn't work in practice as family_id just has to match a
            # participant_id which could be any number of requests different
        # my $participant_id = $c->{Demographics}->{participant_id};
        # unless ( $participant_id - $family_id ~~ [0..9] ) {
        #    p [ $participant_id, $family_id ];
        #    $h{family_id} = $family_id;
        # }
        # check family_id belongs to existing participant_id (unit_number):
        $dbix->query('select 1 from patient_case where unit_number = ?',
            $family_id)->into(my $pid_match_ok);
        # family_id_prefix is 1st 3 digits of rare_disease participant id base:
        unless ( $pid_match_ok && $family_id =~ /^$family_id_prefix/ ) {
            $h{family_id} = $family_id;
        }
    }
    # participant ID is 9 digits and matches allocated RD or Cancer block:
    if ( my $participant_id = $c->{Demographics}->{participant_id} ) { # p $participant_id;
        my $request_id = $c->{Demographics}->{request_id};
        my $expected   = $genomics_uat->{rare_disease} + $request_id;
        if ( my $arm = $c->{Demographics}->{arm} ) {
            $expected += $genomics_uat->{cancer_incr} if lc $arm eq 'cancer';
        } # p $expected;
        else { # not screened:
            $expected = 0; # ensures failure even if length = 9
        }
        $h{particiant_id} = $participant_id if length $participant_id != 9
            or ! _matches($participant_id, $expected);
    }
    my @errs = map {
        sprintf q!Illegal value '%s' in element <%s>, does not match required !
            . ' pattern.', $h{$_}, $_;
    } keys %h;
    return @errs ? \@errs : 0; # don't return empty arrayref (evals 'true' for caller)
}

sub _matches { Local::Utils::matches(@_) }

sub _trim {
	my $data = shift; # p $data;
    KEY:
	for my $key ( keys %$data ) { #	say "$key: $data->{$key}"; # p $data->{$key};
        if ( ref($data->{$key}) eq 'HASH' ) { # recursive call for hashrefs
			# warn $data->{$key};
            _trim( $data->{$key} ); # p $r->{$key};
			next KEY; # not sure why this is necessary ??
        } # say "$key: $data->{$key}";
		# remove leading/trailing space chars (incl. unicode 160) from string:
		$data->{$key} =~ s/^\p{Space}//g;
		$data->{$key} =~ s/\p{Space}$//g; # p $data->{$key};
	} # p $data;
	return 0; # no need - modifying hashref in situ
}