# populates hilis4.ngis_lab_test using $src
# maps hilis4.lab_test_id to NGIS test id (M91.9, M80.5, etc)
# requires ngis_lab_test table (setup/geneq.sql)
# OK to re-run UNLESS data updated through app - deletes and rebuilds ngis_lab_test
use strict;
use warnings;
use Getopt::Std;
getopts('q');
our($opt_q);
# switch on sql output to console:
$ENV{SQL_TRACE} = $opt_q;
#===============================================================================
my $src = 'HILIS_Genetic_Tests.xlsx';
#===============================================================================
use Spreadsheet::Read qw(ReadData rows);
use FindBin qw($Bin);
use Data::Printer;
use Modern::Perl;
use lib '/home/raj/perl-lib';
use Local::DB;
my $dbix = Local::DB->dbix({ dbname => 'hilis4' });
my $xls = ReadData($Bin.'/'.$src) || die "cannot read $src"; # p $xls; exit;
my $sheet = $xls->[1]; # p $sheet; # 1st sheet
my @rows = rows($sheet); # p @rows; # AoA
my $headers = shift @rows; # p $headers; exit;
my %all_tests;
# clear ngis_lab_test table:
$dbix->dbh->do('TRUNCATE TABLE ngis_lab_test');
my $lab_sections = $dbix->select('lab_sections', [ qw/section_name id/ ])->map; # p $lab_sections;
ROW:
for my $row (@rows) { # arrayref
my $ref = _to_hash($row); # p $ref; # next;
# get data from row:
my $type = $ref->{test_type};
next ROW if not $type; # 3 external rows to handle separately
my $test = $ref->{field_label};
# substitute lab-test names from spreadsheet to match lab_tests table:
$test =~ s/&/&/g;
$test =~ s/MLPA quantification/Quantification \(selected\)/;
$test =~ s/MLPA myeloma/Myeloma/;
$test =~ s/MLPA CLL/Pre-treatment CLL \(P038\)/;
# $test =~ s/1(4q32.33)/$1/; # this is incorrect
$test =~ s/(FISH H & E)/$1 review/;
my $section = $ref->{section_name}; # p $section;
# substitute section names from spreadsheet to match lab_sections table:
$section =~ s/MLPA/Multiplex Ligation-dependent Probe Amplification/;
$section =~ s/NGS/High-throughput sequencing/;
# get lab_tests.id from hilis:
my $test_id = do {
my %h = (
field_label => $test,
test_type => $type,
lab_section_id => $lab_sections->{$section},
); # p %h;
$dbix->select('lab_tests', 'id', \%h)->value;
};
say "no id for $test" and next ROW if not $test_id;
# get all NGIS id's from row (ALL, MDS, AA, NMML, etc):
my @ngis_ids = grep $_, @$row[3..18]; # p @ngis_ids;
# say "no NGIS id for $test" and next ROW if not @ngis_ids;
{ # create hash of all NGIS id's, values = count (for duplicates)
my %h;
for my $id (@ngis_ids) {
if ( my @multi = split / /, $id ) {
$h{$_}++ for @multi;
}
else {
$h{$id}++;
}
}
# check for duplicate NGIS id's for lab-test before proceeding:
die "$test has multiple NGIS ids" if grep $h{$_} > 1, keys %h;
# new %all_tests entry (array of NGIS id's):
$all_tests{$test_id} = [ keys %h ];
}
} # p %all_tests;
# write %all_tests to table:
while ( my ($hilis_id, $ref) = each %all_tests ) { # scalar, aref
for my $ngis_code (@$ref) {
my %h = ( lab_test_id => $hilis_id, ngis_test_code => $ngis_code );
$dbix->insert('ngis_lab_test', \%h );
}
}
sub _to_hash {
my $ref = shift; # p $ref; # arrayref
my %h;
my $i = 0;
for (@$headers) { # p $_;
$h{$_} = $ref->[$i++];
} # p %h;
return \%h;
}