# populates hilis4.ngis_lab_test using $src # maps hilis4.lab_test_id to NGIS test id (M91.9, M80.5, etc) # requires ngis_lab_test table (setup/geneq.sql) # OK to re-run UNLESS data updated through app - deletes and rebuilds ngis_lab_test use strict; use warnings; use Getopt::Std; getopts('q'); our($opt_q); # switch on sql output to console: $ENV{SQL_TRACE} = $opt_q; #=============================================================================== my $src = 'HILIS_Genetic_Tests.xlsx'; #=============================================================================== use Spreadsheet::Read qw(ReadData rows); use FindBin qw($Bin); use Data::Printer; use Modern::Perl; use lib '/home/raj/perl-lib'; use Local::DB; my $dbix = Local::DB->dbix({ dbname => 'hilis4' }); my $xls = ReadData($Bin.'/'.$src) || die "cannot read $src"; # p $xls; exit; my $sheet = $xls->[1]; # p $sheet; # 1st sheet my @rows = rows($sheet); # p @rows; # AoA my $headers = shift @rows; # p $headers; exit; my %all_tests; # clear ngis_lab_test table: $dbix->dbh->do('TRUNCATE TABLE ngis_lab_test'); my $lab_sections = $dbix->select('lab_sections', [ qw/section_name id/ ])->map; # p $lab_sections; ROW: for my $row (@rows) { # arrayref my $ref = _to_hash($row); # p $ref; # next; # get data from row: my $type = $ref->{test_type}; next ROW if not $type; # 3 external rows to handle separately my $test = $ref->{field_label}; # substitute lab-test names from spreadsheet to match lab_tests table: $test =~ s/&/&/g; $test =~ s/MLPA quantification/Quantification \(selected\)/; $test =~ s/MLPA myeloma/Myeloma/; $test =~ s/MLPA CLL/Pre-treatment CLL \(P038\)/; # $test =~ s/1(4q32.33)/$1/; # this is incorrect $test =~ s/(FISH H & E)/$1 review/; my $section = $ref->{section_name}; # p $section; # substitute section names from spreadsheet to match lab_sections table: $section =~ s/MLPA/Multiplex Ligation-dependent Probe Amplification/; $section =~ s/NGS/High-throughput sequencing/; # get lab_tests.id from hilis: my $test_id = do { my %h = ( field_label => $test, test_type => $type, lab_section_id => $lab_sections->{$section}, ); # p %h; $dbix->select('lab_tests', 'id', \%h)->value; }; say "no id for $test" and next ROW if not $test_id; # get all NGIS id's from row (ALL, MDS, AA, NMML, etc): my @ngis_ids = grep $_, @$row[3..18]; # p @ngis_ids; # say "no NGIS id for $test" and next ROW if not @ngis_ids; { # create hash of all NGIS id's, values = count (for duplicates) my %h; for my $id (@ngis_ids) { if ( my @multi = split / /, $id ) { $h{$_}++ for @multi; } else { $h{$id}++; } } # check for duplicate NGIS id's for lab-test before proceeding: die "$test has multiple NGIS ids" if grep $h{$_} > 1, keys %h; # new %all_tests entry (array of NGIS id's): $all_tests{$test_id} = [ keys %h ]; } } # p %all_tests; # write %all_tests to table: while ( my ($hilis_id, $ref) = each %all_tests ) { # scalar, aref for my $ngis_code (@$ref) { my %h = ( lab_test_id => $hilis_id, ngis_test_code => $ngis_code ); $dbix->insert('ngis_lab_test', \%h ); } } sub _to_hash { my $ref = shift; # p $ref; # arrayref my %h; my $i = 0; for (@$headers) { # p $_; $h{$_} = $ref->[$i++]; } # p %h; return \%h; }