package LIMS::Local::GenomicsValidation; use Moo; use Local::Utils; use Clone qw(clone); use Data::Printer alias => 'p'; use XML::SAX::ParserFactory; use XML::Validator::Schema; has xsd_src => ( is => 'ro', required => 1 ); has dbix => ( is => 'ro', required => 1 ); has cfg => ( is => 'ro', required => 1 ); has rare_disease_base => ( is => 'lazy' ); has family_id_prefix => ( is => 'lazy' ); has cancer_increment => ( is => 'lazy' ); has lab_tests_map => ( is => 'lazy' ); sub _build_cancer_increment { shift->cfg->{genomics}->{uat}->{cancer_incr} } sub _build_rare_disease_base { shift->cfg->{genomics}->{uat}->{rare_disease} } sub _build_family_id_prefix { my $self = shift; # family_id_prefix = 1st 3 digits of rare_disease participant id base: my ($family_id_prefix) = $self->rare_disease_base =~ /^(\d{3})/; # p $family_id_prefix; return $family_id_prefix; } sub _build_lab_tests_map { # warn '=' x 30; my $self = shift; my $sql = q!SELECT lt.test_name, lt.field_label, ls.section_name FROM lab_tests lt JOIN lab_sections ls on lt.lab_section_id = ls.id!; $self->dbix->query($sql)->hash_map('test_name'); } no autovivification; sub validate_params { my ($self, $data) = @_; # p $data; # hashref my $lab_tests_map = $self->lab_tests_map; # p $lab_tests_map; # may add NHS number later my %errors; my $c = clone $data; # to prevent auto-vivification # family ID (is RD-specific) is 9 digits & begins with 1st 3 digits of # rare_disease participant id base (122xxxxxx): if ( my $family_id = $c->{Approach}->{family_id} ) { # warn $family_id; =begin # family_id should be participant_id + max 9 individuals, but doesn't # work in practice as family_id just has to match a participant_id which # could be any number of requests different my $participant_id = $c->{Demographics}->{participant_id}; unless ( $participant_id - $family_id ~~ [0..9] ) { # p [ $participant_id, $family_id ]; $h{family_id} = $family_id; } =cut if ( my $err_msg = $self->_family_id_check($c) ) { my %h = ( val => $family_id, err => $err_msg ); $errors{family_id} = \%h; } } # participant ID is 9 digits and matches allocated RD or Cancer block: if ( my $participant_id = $c->{Demographics}->{participant_id} ) { # p $participant_id; my $request_id = $c->{Demographics}->{request_id}; my $expected = $self->rare_disease_base + $request_id; if ( my $arm = $c->{Demographics}->{arm} ) { $expected += $self->cancer_increment if lc $arm eq 'cancer'; } # p $expected; else { # not screened: $expected = 0; # ensures failure even if length = 9 } my $match_ok = _matches($participant_id, $expected); unless ( $match_ok && length $participant_id == 9 ) { my $err = $expected # non-value means request unscreened ? $match_ok # must be invalid length if match_ok ? 'does not match expected length (9 characters)' : 'does not match expected numerical range for disease type' : 'unexpected value for unscreened record'; my %h = ( val => $participant_id, err => $err, ); $errors{participant_id} = \%h; # need to add participant_id (ie unit number) manually to # $self->lab_test_map for error decode for notification: $self->lab_tests_map->{participant_id} ||= { # unless already added section_name => 'Patient demographics', # not strictly a lab-section field_label => 'Unit number', }; # p $map; } } # p $c->{Demographics}; # nhs number: unless ( $c->{Demographics}->{nhs_number} ) { # need to add nhs_number to $lab_tests_map for use in generation of @errs # as it's not a 'lab-test' but is still validated here with lab-tests; # can't do NHSnum validation in xml as it would require allowance of undef # vals which causes various uninitialized warnings $lab_tests_map->{nhs_number}{field_label} = 'NHS number'; $lab_tests_map->{nhs_number}{section_name} = 'Patient details'; $errors{nhs_number} = undef; } =begin # validated in validate_xml_against_xsd() using XML libs now # DNA/volume_stored is xs:small (integer): if ( my $val = $data->{DNA}->{volume_stored} ) { unless ($val =~ /^\d+$/) { # warn 'here'; $data->{DNA}->{volume_stored} = int($val); } } # DNA/edta1_fluidx_vol is xs:small (integer): if ( my $val = $data->{DNA}->{edta1_fluidx_vol} ) { unless ($val =~ /^\d+$/) { # warn 'here'; $data->{DNA}->{edta1_fluidx_vol} = int($val); } } =cut { # sample ID's (9-digits, decimal, 1/2-digits: my %data = (); { # Specimens - blood section: $data{$_} = $c->{SpecimensBlood}->{$_} for qw( paxgene_rna edta1 edta2 sst pst ); } { # Specimens - saliva section: $data{$_} = $c->{SpecimensSaliva}->{$_} for qw( oragene_tube_1 oragene_tube_2 ); } # tumour specimen ID: $data{sample_id} = $c->{SpecimensTumour}->{sample_id}; # p \%data; # bone marrow specimen ID: $data{bm_sample_id} = $c->{SpecimensBoneMarrow}->{bm_sample_id}; while ( my ($test, $val) = each %data ) { # warn $test; # warn $val; next unless $val && $val !~ /^\d{9}\.\d{1,2}$/; $errors{$test} = { val => $val, err => undef }; # use default err msg } } # p \%errors; my $default_error = 'does not match required definition'; my @errs = map { # same format as genomics_xml.pl notify_admin msg: ( $errors{$_} ? ( sprintf qq!Illegal value '%s' !, $errors{$_}{val} ) : 'Illegal NULL value ' ) . sprintf q!for '%s' in '%s' section, %s.!, $lab_tests_map->{$_}{field_label}, $lab_tests_map->{$_}{section_name}, $errors{$_}{err} || $default_error; } keys %errors; return @errs ? \@errs : 0; # don't return empty arrayref (evals 'true' for caller) } #------------------------------------------------------------------------------- sub validate_xml_against_xsd { my ($self, $xml) = @_; # warn $xml; my $xsd_src = $self->xsd_src; # switch debug on to show data structure: my $validator = XML::Validator::Schema->new(file => $xsd_src, debug => 0); # p $validator; my $parser = XML::SAX::ParserFactory->parser(Handler => $validator); eval { $parser->parse_string($xml); }; return $@; } #------------------------------------------------------------------------------- sub reformat_error { my ($self, $error) = @_; my ($lab_test) = $error =~ m!element <(.*)>!; # p $lab_test; my $lab_tests_map = $self->lab_tests_map; # p $lab_tests_map; if ( my $ref = $lab_tests_map->{$lab_test} ) { # get field label & section my $section = $ref->{section_name}; my $label = $ref->{field_label}; $error =~ s/in element <$lab_test>/for '$label' in '$section' section/; # p $error; } return $error; } sub _matches { Local::Utils::matches(@_) } sub _family_id_check { my ($self, $c) = @_; my $family_id = $c->{Approach}->{family_id}; # existence already confirmed my $trial_arm = $c->{Demographics}->{arm}; # p $trial_arm; # family_id_prefix is 1st 3 digits of rare_disease participant id base: my $rd_prefix = $self->family_id_prefix; # p $prefix; # must be rare-disease type: if ( ! $trial_arm || lc $trial_arm ne 'rare disease' ) { return 'family ID only valid for rare disease category'; } # must begin with rare-disease prefix (122) + 6 digits: elsif ( $family_id !~ /^$rd_prefix\d{6}$/ ) { return 'invalid format (require 122xxxxxx)'; } # must match an existing participant_id (unit_number): else { my @query_args = ( 'patient_case', { unit_number => $family_id } ); unless ( $self->dbix->count(@query_args) ) { # count - returns 1 or 0 return 'no matching proband'; } } return 0; } 1;