RSS Git Download  Clone
Raw Blame History
# populates hilis4.ngis_lab_test using $src
# maps hilis4.lab_test_id to NGIS test id (M91.9, M80.5, etc)
# requires ngis_lab_test table (setup/geneq.sql)
# OK to re-run UNLESS data updated through app - deletes and rebuilds ngis_lab_test

use strict;
use warnings;

use Getopt::Std;
getopts('q');
our($opt_q);

# switch on sql output to console:
$ENV{SQL_TRACE}  = $opt_q;

#===============================================================================
my $src = 'HILIS_Genetic_Tests.xlsx';
#===============================================================================

use Spreadsheet::Read qw(ReadData rows);
use FindBin qw($Bin);
use Data::Printer;
use Modern::Perl;

use lib '/home/raj/perl-lib';
use Local::DB;

my $dbix = Local::DB->dbix({ dbname => 'hilis4' });
my $xls  = ReadData($Bin.'/'.$src) || die "cannot read $src"; # p $xls; exit;

my $sheet = $xls->[1];     # p $sheet; # 1st sheet
my @rows  = rows($sheet);  # p @rows; # AoA
my $headers = shift @rows; # p $headers; exit;

my %all_tests;

# clear ngis_lab_test table:
$dbix->dbh->do('TRUNCATE TABLE ngis_lab_test');

my $lab_sections = $dbix->select('lab_sections', [ qw/section_name id/ ])->map; # p $lab_sections;
ROW:
for my $row (@rows) { # arrayref
    my $ref = _to_hash($row); # p $ref; # next;
    # get data from row:
    my $type = $ref->{test_type};
    next ROW if not $type; # 3 external rows to handle separately

    my $test = $ref->{field_label};
    # substitute lab-test names from spreadsheet to match lab_tests table:
    $test =~ s/&/&/g;
    $test =~ s/MLPA quantification/Quantification \(selected\)/;
    $test =~ s/MLPA myeloma/Myeloma/;
    $test =~ s/MLPA CLL/Pre-treatment CLL \(P038\)/;
    # $test =~ s/1(4q32.33)/$1/; # this is incorrect
    $test =~ s/(FISH H & E)/$1 review/;

    my $section = $ref->{section_name}; # p $section;
    # substitute section names from spreadsheet to match lab_sections table:
    $section =~ s/MLPA/Multiplex Ligation-dependent Probe Amplification/;
    $section =~ s/NGS/High-throughput sequencing/;

    # get lab_tests.id from hilis:
    my $test_id = do {
        my %h = (
            field_label    => $test,
            test_type      => $type,
            lab_section_id => $lab_sections->{$section},
        ); # p %h;
        $dbix->select('lab_tests', 'id', \%h)->value;
    };
    say "no id for $test" and next ROW if not $test_id;

    # get all NGIS id's from row (ALL, MDS, AA, NMML, etc):
    my @ngis_ids = grep $_, @$row[3..18]; # p @ngis_ids;
    # say "no NGIS id for $test" and next ROW if not @ngis_ids;

    { # create hash of all NGIS id's, values = count (for duplicates)
        my %h;
        for my $id (@ngis_ids) {
            if ( my @multi = split / /, $id ) {
                $h{$_}++ for @multi;
            }
            else {
                $h{$id}++;
            }
        }
        # check for duplicate NGIS id's for lab-test before proceeding:
        die "$test has multiple NGIS ids" if grep $h{$_} > 1, keys %h;
        # new %all_tests entry (array of NGIS id's):
        $all_tests{$test_id} = [ keys %h ];
    }
} # p %all_tests;

# write %all_tests to table:
while ( my ($hilis_id, $ref) = each %all_tests ) { # scalar, aref
    for my $ngis_code (@$ref) {
        my %h = ( lab_test_id => $hilis_id, ngis_test_code => $ngis_code );
        $dbix->insert('ngis_lab_test', \%h );
    }
}

sub _to_hash {
    my $ref = shift; # p $ref; # arrayref
    my %h;
    my $i = 0;
    for (@$headers) { # p $_;
        $h{$_} = $ref->[$i++];
    } # p %h;
    return \%h;
}