#!/usr/bin/perl
=begin
* synchronises outreach data with external database
* replaces patient table with anonymised version
* dump queries to stdout using '-q'
* not necessary to use PK => FK mappings as tables are copied from source, also
better NOT to use them if using REPLACE INTO due to significant overhead with indexes
=cut
BEGIN {
use Getopt::Std;
getopts('q'); # query output to stdout
our $opt_q;
$ENV{SQL_TRACE} = $opt_q; # warn $ENV{SQL_TRACE}; exit;
}
use lib (
'/home/raj/perl5/lib/perl5',
'/home/raj/perl-lib',
);
use Local::SQL::Abstract::Plugin::InsertMulti; # adds replace_multi() to SAPIM
use Data::Printer alias => 'p', use_prototypes => 0;
use SQL::Abstract::More;
use Modern::Perl;
use Local::Utils;
use Path::Tiny;
use Local::DB;
use IO::All;
# set some package vars (MySQL dates not DT objects & don't log queries):
$Local::QueryLogger::NO_QUERY_LOGS = 1; # don't need queries in logs dir
$Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # don't want DT objects
my $db_name = 'outreach_patient_access';
my $dbix = Local::DB->dbix({ dbname => $db_name });
my $sqla = SQL::Abstract::More->new;
my $ref_date = Local::Utils::today->subtract(days => 1); # p $ref_date->dmy;
$dbix->lc_columns = 0; # preserve alias cases
# patient-related tables =======================================================
{
my @col_names = (
'p.id|patientId',
'UPPER( CONCAT( LEFT(p.first_name, 1), LEFT(p.last_name, 1) ) )|initials',
'p.dob',
'p.gender',
'p.updated_at',
'pc.id|pcId',
'pc.referral_source_id|refSrcId',
'pc.time|pcTime',
'pd.address',
'pd.post_code',
'pd.contact_number',
'pd.gp_id',
'pd.practice_id',
'pd.time|pdTime',
'rs.id|practiceId',
'rs.display_name|practiceName',
'rs.organisation_code|practiceCode',
'gp.id|gpId',
'gp.name|gpName',
'gp.national_code|gpCode',
);
my @tbl_rels = (
# table|alias # FK-PK relationship
'hilis4.requests|r' , 'r.patient_case_id = pc.id' ,
'hilis4.patient_case|pc' , 'pc.patient_id = p.id' ,
'hilis4.patients|p' , '=>pd.patient_id = p.id' , # left join
'hilis4.patient_demographics|pd' , 'pd.practice_id = rs.id' ,
'hilis4.referral_sources|rs' , 'pd.gp_id = gp.id' ,
'hilis4.referrers|gp' , 'ris.request_id = r.id' ,
'hilis4.request_initial_screen|ris' , 'ris.screen_id = s.id' ,
'hilis4.screens|s'
);
my %where = (
's.description' => { -in => ['Outreach','Community monitoring'] },
'pd.status' => 'alive',
'-or' => {
'r.created_at' => { '>=' => $ref_date->ymd },
'p.updated_at' => { '>=' => $ref_date->ymd },
'pc.time' => { '>=' => $ref_date->ymd },
'pd.time' => { '>=' => $ref_date->ymd },
}
);
my @group = 'p.id';
my @sort = 'p.id';
my ($sql, @bind) = get_query_params(
cols => \@col_names,
joins => \@tbl_rels,
where => \%where,
group => \@group,
sort => \@sort,
); # p $sql; p \@bind;
if ( my @data = $dbix->query($sql, @bind)->hashes ) { # p \@data;
update_patient_tables(@data);
}
}
# request-related tables =======================================================
{
my @col_names = qw(
r.id|requestId
r.request_number
r.year
r.patient_case_id
r.status_option_id
r.created_at|registered
r.updated_at
rf.followup_option_id
rf.time|rfTime
pd.pack_due
pd.pack_sent
pd.return_due
pd.time|pdTime
);
my @tbl_rels = (
# table|alias # FK-PK relationship
'hilis4.requests|r' , 'ris.request_id = r.id' ,
'hilis4.request_initial_screen|ris' , 'ris.screen_id = s.id' ,
'hilis4.screens|s' , '=>rf.request_id = r.id' ,
'outreach.request_followup|rf' , '=>rf.followup_option_id = o.id' ,
'outreach.followup_options|o' , '=>pd.request_id = r.id' ,
'outreach.request_pack_dispatch|pd'
);
my %where = (
's.description' => { -in => ['Outreach','Community monitoring'] },
'-or' => {
'r.updated_at' => { '>=' => $ref_date->ymd },
'r.created_at' => { '>=' => $ref_date->ymd },
'rf.time' => { '>=' => $ref_date->ymd },
'pd.time' => { '>=' => $ref_date->ymd },
}
);
my @group = 'r.id';
my @sort = 'r.id';
my ($sql, @bind) = get_query_params(
cols => \@col_names,
joins => \@tbl_rels,
where => \%where,
group => \@group,
sort => \@sort,
); # p $sql; p \@bind;
if ( my @data = $dbix->query($sql, @bind)->hashes ) { # p \@data;
update_request_tables(@data);
}
}
=begin # using completed_sections instead
{ # haematology table (join too complex for SQLA):
my $sql = q!SELECT r.id as 'requestId', rh.* FROM hilis4.request_haematology
rh JOIN outreach_patient_access.requests r on rh.lab_number
= CONCAT(r.year - 2000, '_', LPAD(r.request_number, 5, 0))
WHERE rh.updated >= ?!;
if ( my @data = $dbix->query($sql, $ref_date->ymd)->hashes ) {
update_haematology_table(@data);
}
}
=cut
{ # completed_sections
my @col_names = qw(
r.id|request_id
COUNT(DISTINCT(t.description))|count
);
my @tbl_rels = qw(
outreach_patient_access.requests|r rr.request_id=r.id
outreach.request_results|rr rr.param_id=p.id
outreach.lab_params|p p.department_id=t.id
outreach.result_types|t
);
my %where = (
't.description' => { -in => ['haematology', 'flow_cytometry'] },
);
my @group = 'r.id';
my @sort = 'r.id';
my ($sql, @bind) = get_query_params(
cols => \@col_names,
joins => \@tbl_rels,
where => \%where,
group => \@group,
sort => \@sort,
); # p $sql; p \@bind;#
if ( my @data = $dbix->query($sql, @bind)->hashes ) { # p \@data;
update_lab_sections_data_table(@data);
}
}
# disabled (day > 31) - is optimize tables necessary ?
if ( $ref_date->day > 31 ) { # optimize tables after multiple delete/insert:
my @tables = $dbix->query('SHOW TABLES')->column; # p \@tables;
# do { my $cols = $dbix->get_cols($_); p $cols; } for @tables;
$dbix->dbh->do('OPTIMIZE TABLE ' . $_) for @tables;
}
printf "%s: outreach patient access tables updated successfully\n",
$ref_date->clone->add(days =>1)->ymd;
#===============================================================================
sub update_patient_tables {
my @data = @_; # p \@data;
# update patient table:
{
my @dataset = do {
my @cols = qw/patientId initials dob gender updated_at/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'patients';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
# update patient_case table:
{
my @dataset = do {
my @cols = qw/pcId patientId refSrcId pcTime/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'patient_case';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
# update patient_demographics table:
{
my @dataset = do {
my @cols = qw/patientId address post_code contact_number gp_id
practice_id pdTime/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'patient_demographics';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
# update referral_sources table:
{
my @dataset = do {
my @cols = qw/practiceId practiceName practiceCode/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'referral_sources';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
# update referrers table:
{
my @dataset = do {
my @cols = qw/gpId gpName gpCode/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'referrers';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
# alternative to multi-value insert, do conventional inserts:
# $dbix->query("REPLACE INTO $db_name.patients VALUES (??)", @$_) for @data;
}
#===============================================================================
sub update_request_tables {
my @data = @_; # p \@data;
# update requests table:
{
my @dataset = do{
my @cols = qw/requestId request_number year patient_case_id
status_option_id registered updated_at/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'requests';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
# update request_followup table:
{
my @dataset = do {
my @cols = qw/requestId followup_option_id rfTime/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'request_followup';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
# update request_pack_dispatch table:
{
my @dataset = do {
my @cols = qw/requestId pack_due pack_sent return_due pdTime/;
map { [ @$_{@cols} ] } @data;
}; # p \@set;
my $tbl = join '.', $db_name, 'request_pack_dispatch';
my $cols = $dbix->get_cols($tbl);
my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
}
#===============================================================================
sub update_lab_sections_data_table {
my @dataset = @_; # p \@data;
my $tbl = join '.', $db_name, 'lab_sections_data';
my $cols = $dbix->get_cols($tbl);
my @odku = qw(count); # list of cols for on duplicate key update list
my ($stmt, @bind)
= $sqla->update_or_insert_multi($tbl, $cols, \@dataset, \@odku); # p [$stmt, @bind];
$dbix->query($stmt, @bind);
}
#===============================================================================
sub get_query_params {
my %h = @_; # p \%h;
my @params = ( -columns => $h{cols} );
push @params, ( -where => $h{where} ) if $h{where};
push @params, ( -group_by => $h{group} ) if $h{group};
push @params, ( -order_by => $h{sort} ) if $h{sort};
# expects joins OR tbls:
if ( my $joins = $h{joins} ) {
push @params, ( -from => [ -join => @$joins ] );
}
elsif ( my $tables = $h{tbls} ) {
push @params, ( -from => $tables );
}
else { die "require join relationships or list of tables" }
my ($sql, @bind) = $sqla->select(@params); # p $sql; p \@bind;
return ($sql, @bind);
}
__DATA__
# create patients table ========================================================
USE hilis4;
CREATE TABLE IF NOT EXISTS outreach_patient_access.patients (
`id` INT(11),
`initials` CHAR(2),
`dob` DATE,
`gender` ENUM('M','F','U'),
`time` TIMESTAMP,
PRIMARY KEY (`id`)
)
ENGINE=InnoDB;
# get all outreach patients:
REPLACE INTO outreach_patient_access.patients
SELECT p.id,
UPPER( CONCAT( LEFT(p.first_name, 1), LEFT(p.last_name, 1) ) ) as initials,
p.dob, p.gender, p.updated_at
FROM requests r
JOIN ( patient_case pc JOIN patients p on pc.patient_id = p.id )
on r.patient_case_id = pc.id
JOIN patient_demographics pd on pd.patient_id = p.id
JOIN ( request_initial_screen ris JOIN screens s on ris.screen_id = s.id )
on ris.request_id = r.id
WHERE s.description IN ('Outreach','Community monitoring')
and pd.`status` = 'alive'
GROUP BY p.id
ORDER BY p.id;
# create patient_case table ========================================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.patient_case (
`id` INT(11),
`patient_id` INT(11),
`referral_source_id` SMALLINT(6),
`time` TIMESTAMP,
PRIMARY KEY (`id`),
INDEX `patient_id` (`patient_id`),
INDEX `referral_source_id` (`referral_source_id`)
)
ENGINE=InnoDB;
# get all outreach patient_case:
REPLACE INTO outreach_patient_access.patient_case
SELECT pc.id, pc.patient_id, pc.referral_source_id, pc.time
FROM hilis4.patient_case pc
JOIN outreach_patient_access.patients p on pc.patient_id = p.id;
# create patient_access table ========================================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.patient_access (
`patient_id` INT(11) UNSIGNED NOT NULL,
`email` VARCHAR(255) NOT NULL,
`password` VARCHAR(255) NOT NULL,
`last_login` DATETIME NOT NULL DEFAULT '1900-01-01 00:00:00'
)
ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.patient_access(patient_id, email, password)
SELECT p.id, CONCAT('user', p.id, '@email.net'), 'aiBPCXYMIAaO5v0SN405sSlwaco' /* outreach */
FROM outreach_patient_access.patients p;
# create patient_demographics table ========================================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.patient_demographics (
`patient_id` INT(11),
`address` VARCHAR(255),
`post_code` VARCHAR(8),
`contact_number` VARCHAR(15),
`gp_id` INT(11),
`practice_id` SMALLINT(6),
`time` TIMESTAMP,
PRIMARY KEY (`patient_id`),
INDEX `practice_id` (`practice_id`),
INDEX `gp_` (`gp_id`)
)
ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.patient_demographics
SELECT pd.patient_id, pd.address, pd.post_code, pd.contact_number, pd.gp_id,
pd.practice_id, pd.time
FROM hilis4.patient_demographics pd JOIN outreach_patient_access.patients p
on pd.patient_id = p.id;
# create requests table ========================================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.requests (
`id` INT(11),
`request_number` INT(11),
`year` YEAR,
`patient_case_id` INT(11),
`status_option_id` SMALLINT(6),
`registered` TIMESTAMP,
`time` TIMESTAMP,
PRIMARY KEY (`id`),
INDEX `patient_case_id` (`patient_case_id`),
INDEX `status_option_id` (`status_option_id`)
)
ENGINE=InnoDB;
# get all outreach requests:
REPLACE INTO outreach_patient_access.requests
SELECT r.id, r.request_number, r.year, r.patient_case_id, r.status_option_id,
r.created_at, r.updated_at
FROM requests r
JOIN ( patient_case pc JOIN patients p on pc.patient_id = p.id )
on r.patient_case_id = pc.id
JOIN patient_demographics pd on pd.patient_id = p.id
JOIN ( request_initial_screen ris join screens s on ris.screen_id = s.id )
on ris.request_id = r.id
WHERE s.description IN ('Outreach','Community monitoring')
and pd.`status` = 'alive'
ORDER BY r.id;
# create result_types table ========================================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.result_types (
`id` smallint(6),
`description` varchar(25),
PRIMARY KEY (`id`)
) ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.result_types (`id`, `description`) VALUES
(1, 'immunology'),
(2, 'haematology'),
(3, 'biochemistry'),
(4, 'flow_cytometry');
# create lab_params table =================================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.lab_params (
`id` SMALLINT(6),
`param_name` VARCHAR(25),
`field_label` VARCHAR(255),
`field_type` ENUM('menu','int','decimal'),
`department_id` SMALLINT(6),
PRIMARY KEY (`id`),
UNIQUE INDEX `param_name` (`param_name`),
INDEX `department_id` (`department_id`)
)
ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.lab_params
SELECT * FROM outreach.lab_params;
# create request_results table =================================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.request_results (
`request_id` INT(11),
`param_id` SMALLINT(6),
`result` VARCHAR(255),
PRIMARY KEY (`request_id`, `param_id`),
INDEX `request_results_ibfk_1` (`param_id`)
)
ENGINE=InnoDB;
# create request_pack_dispatch table ===========================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.request_pack_dispatch (
`request_id` INT(11),
`pack_due` DATE,
`pack_sent` DATE,
`return_due` DATE,
`time` TIMESTAMP,
PRIMARY KEY (`request_id`)
)
ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.request_pack_dispatch
SELECT * FROM outreach.request_pack_dispatch;
# create request_followup table ===========================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.request_followup (
`request_id` INT(11),
`followup_option_id` SMALLINT(6),
`time` TIMESTAMP,
PRIMARY KEY (`request_id`),
INDEX `followup_option_id` (`followup_option_id`)
)
ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.request_followup
SELECT * FROM outreach.request_followup;
# create status_options table ===========================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.status_options (
`id` smallint(6),
`description` varchar(255),
PRIMARY KEY (`id`),
UNIQUE KEY `description` (`description`)
) ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.status_options (`id`, `description`) VALUES
(4, 'authorised'),
(5, 'complete'),
(1, 'new'),
(3, 'reported'),
(2, 'screened');
# create followup_options table ===========================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.followup_options (
`id` smallint(6),
`option` varchar(20),
`label` varchar(25),
PRIMARY KEY (`id`)
) ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.followup_options (`id`, `option`, `label`) VALUES
(1, 'zero_month', 'immediately'),
(2, 'one_month', 'one month'),
(3, 'three_month', 'three months'),
(4, 'six_month', 'six months'),
(5, 'twelve_month', 'twelve months'),
(6, 'clinic_return', 'return to clinic'),
(7, 'dead', 'dead'),
(8, 'other', 'other'),
(9, 'twenty_four_month', 'two years'),
(10, 'sixty_month', 'five years');
# create referral_sources table ===========================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.referral_sources (
`id` SMALLINT(6),
`display_name` VARCHAR(255),
`organisation_code` VARCHAR(6),
PRIMARY KEY (`id`),
UNIQUE INDEX `organisation_code` (`organisation_code`),
UNIQUE INDEX `display_name` (`display_name`)
)
ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.referral_sources
SELECT rs.id, rs.display_name, rs.organisation_code
FROM hilis4.referral_sources rs
WHERE rs.id IN (
SELECT DISTINCT(practice_id) FROM outreach_patient_access.patient_demographics
);
# create referrers table ===========================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.referrers (
`id` INT(11),
`name` VARCHAR(255),
`national_code` VARCHAR(8),
PRIMARY KEY (`id`),
UNIQUE INDEX `national_code` (`national_code`),
INDEX `name` (`name`)
)
ENGINE=InnoDB;
REPLACE INTO outreach_patient_access.referrers
SELECT r.id, r.name, r.national_code
FROM hilis4.referrers r
WHERE r.id IN (
SELECT DISTINCT(gp_id) FROM outreach_patient_access.patient_demographics
);
# lab_sections_data table ===========================================
CREATE TABLE IF NOT EXISTS outreach_patient_access.lab_sections_data (
`request_id` INT(11),
`count` SMALLINT,
`time` TIMESTAMP NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP,
PRIMARY KEY (`request_id`)
)
ENGINE=InnoDB;
INSERT INTO outreach_patient_access.lab_sections_data(request_id, count)
SELECT
r.id, COUNT(DISTINCT(t.description)) as 'count'
# r.id, GROUP_CONCAT(DISTINCT(t.description))
FROM outreach_patient_access.requests r
LEFT JOIN ( outreach.request_results rr
JOIN ( outreach.lab_params p
JOIN outreach.result_types t on p.department_id = t.id )
on rr.param_id = p.id ) on rr.request_id = r.id
WHERE t.description in ('haematology', 'flow_cytometry')
GROUP BY r.id;