P3X GitList

# READ FIRST:

# updates BHODS diagnoses table; requires 2 files:
# * bristol-diagnostic-terms-<date>.xls (export of remote Bristol HILIS4 diagnoses table)
# * bristol-diagnostic-terms-<date>-update.xls
# creates new diagnoses table in lims_test db
# run update-diagnoses-table.sql to integrate with main db - DO NOT RENAME TABLE
# dump output to file:
# perl update-diagnoses-table.pl > update-bhods-diagnoses.txt 2>&1

use Modern::Perl;
use DBIx::Simple;
use Data::Dumper;
use Data::Compare;
use Data::Printer;
use FindBin qw($Bin);
use Spreadsheet::Read;
use Spreadsheet::XLSX; # for above to parse XSLX files

use lib '/home/raj/perl-lib';
use Local::DB;

my $dbix = Local::DB->dbix({ dbname => 'lims_test' });

#==============================================================================
my $table = 'lims_test.diagnoses';

my $dbh = $dbix->dbh;
$dbh->do("DROP TABLE IF EXISTS $table");
$dbh->do("CREATE TABLE $table LIKE bristol.diagnoses");
$dbh->do("INSERT INTO $table SELECT * FROM bristol.diagnoses");
#==============================================================================
# exit;

my $src = 'bristol-diagnostic-terms-Mar-2018.xlsx';
my $new = 'bristol-diagnostic-terms-Mar-2018-update.xlsx';

my $c = new Data::Compare;

my $rows_src = _get_rows_as_hrefs($src); # p $rows_src; # exit;
my $rows_new = _get_rows_as_hrefs($new); # p $rows_new; exit;

ROW:
for my $id (sort alphanum keys %$rows_new) { # p $id;
    my $xl_row = $rows_new->{$id}; # p $xl_row;
    my $db_row = $rows_src->{$id}
        or die "no db data for update file id $id"; # p $db_row;

    my @cols = qw(diagnostic_category_id active icdo3 name);
    my %original = map { $_ => $db_row->{$_} } @cols; # p %original;
    my %update   = map { $_ => $xl_row->{$_} } @cols; # p %update;

    # set defaults in case empty:
    $original{icdo3} ||= 'NULL';
    $update{active}  ||= 'yes';
    $update{icdo3}   ||= 'NULL';

    next if $c->Cmp(\%update, \%original);
        # p %original;
        # p %update;
    say "ID: $id";

    unless ( $original{name} eq $update{name} ) {
        say "old name $original{name}";
        say "new name $update{name}";
        $dbix->update($table, { name => $update{name} },
            { id => $id });
    }
    unless ( lc $original{active} eq lc $update{active} ) {
        say "old active $original{active}";
        say "new active $update{active}";
        $dbix->update($table, { active => lc $update{active} },
            { id => $id });
    }
    unless ( $original{icdo3} eq $update{icdo3} ) {
        say "old icdo3 $original{icdo3}";
        say "new icdo3 $update{icdo3}";
        my $icdo3 = ( !$update{icdo3} || $update{icdo3} eq 'NULL' )
            ? undef : $update{icdo3};
        $dbix->update($table, { icdo3 => $icdo3 },
            { id => $id });
    }
    say "diagnostic categories don't match" unless
        $update{diagnostic_category_id} == $original{diagnostic_category_id};
    say '=' x 60;
}

{ # add new diagnostic terms:
    my $rows = _get_rows($new); # p $rows;
    ROW:
    for my $ref (@$rows) {
        next ROW if $ref->[0]; # id, already done
        my ($name, $icdo3, $diagnostic_category_id, $active) =
            map $ref->[$_], (1,2,3,5);

        if ( my $ref = $dbix->select($table, '*', { name => $name })->hash ) {
            $ref->{icdo3} ||= 'NULL'; # for stdout
            next ROW if $ref->{icdo3} eq $icdo3 && $ref->{active} eq lc $active;
            say '=' x 60;
            say "$name is duplicate";
            say "updating ICDO3 from $ref->{icdo3} to $icdo3"
                if $ref->{icdo3} ne $icdo3;
            say "updating active col from $ref->{active} to $active"
                if $ref->{active} ne $active;
            $icdo3 = undef if $icdo3 eq 'NULL'; # for db input
            $dbix->update($table, { icdo3 => $icdo3, active => lc $active },
                { name => $name });
            say '=' x 60;
            next ROW;
        }
        my %h = (
            name  => $name,
            icdo3 => $icdo3 eq 'NULL' ? undef : $icdo3,
            diagnostic_category_id => $diagnostic_category_id,
            active => lc $active,
        ); # p %h;
        say qq!inserting new diagnosis "$name"!;
        $dbix->insert($table, \%h);
    }
}

sub alphanum { return $a <=> $b }

sub _get_rows_as_hrefs {
    my $src = shift;

    my $rows = _get_rows($src);

    my $headers = shift @$rows; # p $headers;
    $_ =~ s/\s/_/ for @$headers;
    $_ =~ s/(\w+)/lc $1/eg for @$headers;
    $_ =~ s/new_terms/name/ for @$headers; # col header in update file

    my %data;
    for my $aref(@$rows) {
        my $id = $aref->[0] || next; # p $id; # skip new rows without id
        my %h;
        my $i = 0;
        # index by row id:
        $h{$_} = $aref->[$i++] for @$headers; # p %h;
        $data{$id} = \%h;
    }
    return \%data;
}

sub _get_rows {
    my $src = shift;
    my $xls = ReadData($Bin . '/' . $src); # p $xls;
    my $sheet = $xls->[1]; # p $sheet; # 1st sheet
    # rows() not exported so call as fully qualified method:
    my @rows = Spreadsheet::Read::rows($sheet); # p @rows; # AoA
    return \@rows;
}