#!/usr/bin/perl =begin * synchronises outreach data with external database every day (duration = 1d) * creates anonymised outreach_patient_access patient table * dump queries to stdout using '-q' * not necessary to use PK => FK mappings as tables are copied from source, also better NOT to use them if using REPLACE INTO due to significant overhead with indexes =cut BEGIN { use Getopt::Std; getopts('q'); # query output to stdout our $opt_q; $ENV{SQL_TRACE} = $opt_q; # warn $ENV{SQL_TRACE}; exit; } use lib ( '/home/raj/perl5/lib/perl5', '/home/raj/perl-lib', ); use Local::SQL::Abstract::Plugin::InsertMulti; # adds replace_multi() to SAPIM use Data::Printer alias => 'p', use_prototypes => 0; use SQL::Abstract::More; use Modern::Perl; use Local::Utils; use Path::Tiny; use Local::DB; use IO::All; # set some package vars (MySQL dates not DT objects & don't log queries): $Local::QueryLogger::NO_QUERY_LOGS = 1; # don't need queries in logs dir $Local::DBIx::Simple::Result::NO_AUTO_DATE_INFLATION = 1; # don't want DT objects my $db_name = 'outreach_patient_access'; my $dbix = Local::DB->dbix({ dbname => $db_name }); my $sqla = SQL::Abstract::More->new; my $ref_date = Local::Utils::today->subtract(days => 1); # p $ref_date->dmy; $dbix->lc_columns = 0; # preserve alias cases # patient-related tables ======================================================= { my @col_names = ( 'p.id|patientId', 'UPPER( CONCAT( LEFT(p.first_name, 1), LEFT(p.last_name, 1) ) )|initials', 'p.dob', 'p.gender', 'p.nhs_number', 'p.updated_at', 'pc.id|pcId', 'pc.referral_source_id|refSrcId', 'pc.time|pcTime', 'pd.address', 'pd.post_code', 'pd.contact_number', 'pd.gp_id', 'pd.practice_id', 'pd.time|pdTime', 'rs.id|practiceId', 'rs.display_name|practiceName', 'rs.organisation_code|practiceCode', 'gp.id|gpId', 'gp.name|gpName', 'gp.national_code|gpCode', ); my @tbl_rels = ( # table|alias # FK-PK relationship 'hilis4.requests|r' , 'r.patient_case_id = pc.id' , 'hilis4.patient_case|pc' , 'pc.patient_id = p.id' , 'hilis4.patients|p' , '=>pd.patient_id = p.id' , # left join 'hilis4.patient_demographics|pd' , 'pd.practice_id = rs.id' , 'hilis4.referral_sources|rs' , 'pd.gp_id = gp.id' , 'hilis4.referrers|gp' , 'ris.request_id = r.id' , 'hilis4.request_initial_screen|ris' , 'ris.screen_id = s.id' , 'hilis4.screens|s' ); my %where = ( 's.description' => { -in => ['Outreach','Community monitoring'] }, 'pd.status' => 'alive', '-or' => { 'r.created_at' => { '>=' => $ref_date->ymd }, 'p.updated_at' => { '>=' => $ref_date->ymd }, 'pc.time' => { '>=' => $ref_date->ymd }, 'pd.time' => { '>=' => $ref_date->ymd }, } ); my @group = 'p.id'; my @sort = 'p.id'; my ($sql, @bind) = get_query_params( cols => \@col_names, joins => \@tbl_rels, where => \%where, group => \@group, sort => \@sort, ); # p $sql; p \@bind; $dbix->dump_query($sql, @bind) if $opt_q; # exit; if ( my @data = $dbix->query($sql, @bind)->hashes ) { # p \@data; update_patient_tables(@data); } } # request-related tables ======================================================= { my @col_names = qw( r.id|requestId r.request_number r.year r.patient_case_id r.status_option_id r.created_at|registered r.updated_at rf.followup_option_id rf.time|rfTime pd.pack_due pd.pack_sent pd.return_due pd.time|pdTime ); my @tbl_rels = ( # table|alias # FK-PK relationship 'hilis4.requests|r' , 'ris.request_id = r.id' , 'hilis4.request_initial_screen|ris' , 'ris.screen_id = s.id' , 'hilis4.screens|s' , '=>rf.request_id = r.id' , 'outreach.request_followup|rf' , '=>rf.followup_option_id = o.id' , 'outreach.followup_options|o' , '=>pd.request_id = r.id' , 'outreach.request_pack_dispatch|pd' ); my %where = ( 's.description' => { -in => ['Outreach','Community monitoring'] }, '-or' => { 'r.updated_at' => { '>=' => $ref_date->ymd }, 'r.created_at' => { '>=' => $ref_date->ymd }, 'rf.time' => { '>=' => $ref_date->ymd }, 'pd.time' => { '>=' => $ref_date->ymd }, } ); my @group = 'r.id'; my @sort = 'r.id'; my ($sql, @bind) = get_query_params( cols => \@col_names, joins => \@tbl_rels, where => \%where, group => \@group, sort => \@sort, ); # p $sql; p \@bind; if ( my @data = $dbix->query($sql, @bind)->hashes ) { # p \@data; update_request_tables(@data); } } =begin # using completed_sections instead { # haematology table (join too complex for SQLA): my $sql = q!SELECT r.id as 'requestId', rh.* FROM hilis4.request_haematology rh JOIN outreach_patient_access.requests r on rh.lab_number = CONCAT(r.year - 2000, '_', LPAD(r.request_number, 5, 0)) WHERE rh.updated >= ?!; if ( my @data = $dbix->query($sql, $ref_date->ymd)->hashes ) { update_haematology_table(@data); } } =cut { # completed_sections my @col_names = qw( r.id|request_id COUNT(DISTINCT(t.description))|count ); my @tbl_rels = qw( outreach_patient_access.requests|r rr.request_id=r.id outreach.request_results|rr rr.param_id=p.id outreach.lab_params|p p.department_id=t.id outreach.result_types|t ); my %where = ( 't.description' => { -in => ['haematology', 'flow_cytometry'] }, ); my @group = 'r.id'; my @sort = 'r.id'; my ($sql, @bind) = get_query_params( cols => \@col_names, joins => \@tbl_rels, where => \%where, group => \@group, sort => \@sort, ); # p $sql; p \@bind; if ( my @data = $dbix->query($sql, @bind)->hashes ) { # p \@data; update_lab_sections_data_table(@data); } } # disabled (day > 31) - is optimize tables necessary ? if ( $ref_date->day > 31 ) { # optimize tables after multiple delete/insert: my @tables = $dbix->query('SHOW TABLES')->column; # p \@tables; # do { my $cols = $dbix->get_cols($_); p $cols; } for @tables; $dbix->dbh->do('OPTIMIZE TABLE ' . $_) for @tables; } printf "%s: outreach patient access tables updated successfully\n", $ref_date->clone->add(days =>1)->ymd; #=============================================================================== sub update_patient_tables { my @data = @_; # p \@data; # update patient table: { my @dataset = do { my @cols = qw/patientId initials dob gender nhs_number updated_at/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'patients'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # update patient_case table: { my @dataset = do { my @cols = qw/pcId patientId refSrcId pcTime/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'patient_case'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # update patient_access table: { my @dataset = do { map { [ $_->{patientId}, 'user' . $_->{patientId} . '@email.net', 'aiBPCXYMIAaO5v0SN405sSlwaco', # password (outreach) ] } @data; }; # p \@dataset; my $tbl = join '.', $db_name, 'patient_access'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # update patient_demographics table: { my @dataset = do { my @cols = qw/patientId address post_code contact_number gp_id practice_id pdTime/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'patient_demographics'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # update referral_sources table: { my @dataset = do { my @cols = qw/practiceId practiceName practiceCode/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'referral_sources'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # update referrers table: { my @dataset = do { my @cols = qw/gpId gpName gpCode/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'referrers'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # alternative to multi-value insert, do conventional inserts: # $dbix->query("REPLACE INTO $db_name.patients VALUES (??)", @$_) for @data; } #=============================================================================== sub update_request_tables { my @data = @_; # p \@data; # update requests table: { my @dataset = do{ my @cols = qw/requestId request_number year patient_case_id status_option_id registered updated_at/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'requests'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # update request_followup table: { my @dataset = do { my @cols = qw/requestId followup_option_id rfTime/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'request_followup'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } # update request_pack_dispatch table: { my @dataset = do { my @cols = qw/requestId pack_due pack_sent return_due pdTime/; map { [ @$_{@cols} ] } @data; }; # p \@set; my $tbl = join '.', $db_name, 'request_pack_dispatch'; my $cols = $dbix->get_cols($tbl); my ($stmt, @bind) = $sqla->replace_multi($tbl, $cols, \@dataset); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } } #=============================================================================== sub update_lab_sections_data_table { my @dataset = @_; # p \@data; my $tbl = join '.', $db_name, 'lab_sections_data'; my $cols = $dbix->get_cols($tbl); my @odku = qw(count); # list of cols for on duplicate key update list my ($stmt, @bind) = $sqla->update_or_insert_multi($tbl, $cols, \@dataset, \@odku); # p [$stmt, @bind]; $dbix->query($stmt, @bind); } #=============================================================================== sub get_query_params { my %h = @_; # p \%h; my @params = ( -columns => $h{cols} ); push @params, ( -where => $h{where} ) if $h{where}; push @params, ( -group_by => $h{group} ) if $h{group}; push @params, ( -order_by => $h{sort} ) if $h{sort}; # expects joins OR tbls: if ( my $joins = $h{joins} ) { push @params, ( -from => [ -join => @$joins ] ); } elsif ( my $tables = $h{tbls} ) { push @params, ( -from => $tables ); } else { die "require join relationships or list of tables" } my ($sql, @bind) = $sqla->select(@params); # p $sql; p \@bind; return ($sql, @bind); } __DATA__ # create patients table ======================================================== USE hilis4; CREATE TABLE IF NOT EXISTS outreach_patient_access.patients ( `id` INT(11), `initials` CHAR(2), `dob` DATE, `gender` ENUM('M','F','U'), `nhs_number` BIGINT(10), `time` TIMESTAMP, PRIMARY KEY (`id`) ) ENGINE=InnoDB; # get all outreach patients: REPLACE INTO outreach_patient_access.patients SELECT p.id, UPPER( CONCAT( LEFT(p.first_name, 1), LEFT(p.last_name, 1) ) ) as initials, p.dob, p.gender, p.nhs_number, p.updated_at FROM requests r JOIN ( patient_case pc JOIN patients p on pc.patient_id = p.id ) on r.patient_case_id = pc.id JOIN patient_demographics pd on pd.patient_id = p.id JOIN ( request_initial_screen ris JOIN screens s on ris.screen_id = s.id ) on ris.request_id = r.id WHERE s.description IN ('Outreach','Community monitoring') and pd.`status` = 'alive' GROUP BY p.id ORDER BY p.id; # create patient_case table ======================================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.patient_case ( `id` INT(11), `patient_id` INT(11), `referral_source_id` SMALLINT(6), `time` TIMESTAMP, PRIMARY KEY (`id`), INDEX `patient_id` (`patient_id`), INDEX `referral_source_id` (`referral_source_id`) ) ENGINE=InnoDB; # get all outreach patient_case: REPLACE INTO outreach_patient_access.patient_case SELECT pc.id, pc.patient_id, pc.referral_source_id, pc.time FROM hilis4.patient_case pc JOIN outreach_patient_access.patients p on pc.patient_id = p.id; # create patient_access table ======================================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.patient_access ( `patient_id` INT(11) UNSIGNED NOT NULL, `email` VARCHAR(255) NOT NULL, `password` VARCHAR(255) NOT NULL, `last_login` DATETIME NOT NULL DEFAULT '1900-01-01 00:00:00' ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.patient_access(patient_id, email, password) SELECT p.id, CONCAT('user', p.id, '@email.net'), 'aiBPCXYMIAaO5v0SN405sSlwaco' /* outreach */ FROM outreach_patient_access.patients p; # create patient_demographics table ======================================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.patient_demographics ( `patient_id` INT(11), `address` VARCHAR(255), `post_code` VARCHAR(8), `contact_number` VARCHAR(15), `gp_id` INT(11), `practice_id` SMALLINT(6), `time` TIMESTAMP, PRIMARY KEY (`patient_id`), INDEX `practice_id` (`practice_id`), INDEX `gp_` (`gp_id`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.patient_demographics SELECT pd.patient_id, pd.address, pd.post_code, pd.contact_number, pd.gp_id, pd.practice_id, pd.time FROM hilis4.patient_demographics pd JOIN outreach_patient_access.patients p on pd.patient_id = p.id; # create requests table ======================================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.requests ( `id` INT(11), `request_number` INT(11), `year` YEAR, `patient_case_id` INT(11), `status_option_id` SMALLINT(6), `registered` TIMESTAMP, `time` TIMESTAMP, PRIMARY KEY (`id`), INDEX `patient_case_id` (`patient_case_id`), INDEX `status_option_id` (`status_option_id`) ) ENGINE=InnoDB; # get all outreach requests: REPLACE INTO outreach_patient_access.requests SELECT r.id, r.request_number, r.year, r.patient_case_id, r.status_option_id, r.created_at, r.updated_at FROM requests r JOIN ( patient_case pc JOIN patients p on pc.patient_id = p.id ) on r.patient_case_id = pc.id JOIN patient_demographics pd on pd.patient_id = p.id JOIN ( request_initial_screen ris join screens s on ris.screen_id = s.id ) on ris.request_id = r.id WHERE s.description IN ('Outreach','Community monitoring') and pd.`status` = 'alive' ORDER BY r.id; # create result_types table ======================================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.result_types ( `id` smallint(6), `description` varchar(25), PRIMARY KEY (`id`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.result_types (`id`, `description`) VALUES (1, 'immunology'), (2, 'haematology'), (3, 'biochemistry'), (4, 'flow_cytometry'); # create lab_params table ================================================= CREATE TABLE IF NOT EXISTS outreach_patient_access.lab_params ( `id` SMALLINT(6), `param_name` VARCHAR(25), `field_label` VARCHAR(255), `field_type` ENUM('menu','int','decimal'), `department_id` SMALLINT(6), PRIMARY KEY (`id`), UNIQUE INDEX `param_name` (`param_name`), INDEX `department_id` (`department_id`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.lab_params SELECT * FROM outreach.lab_params; # create request_results table ================================================= CREATE TABLE IF NOT EXISTS outreach_patient_access.request_results ( `request_id` INT(11), `param_id` SMALLINT(6), `result` VARCHAR(255), PRIMARY KEY (`request_id`, `param_id`), INDEX `request_results_ibfk_1` (`param_id`) ) ENGINE=InnoDB; # create request_pack_dispatch table =========================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.request_pack_dispatch ( `request_id` INT(11), `pack_due` DATE, `pack_sent` DATE, `return_due` DATE, `time` TIMESTAMP, PRIMARY KEY (`request_id`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.request_pack_dispatch SELECT * FROM outreach.request_pack_dispatch; # create request_followup table =========================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.request_followup ( `request_id` INT(11), `followup_option_id` SMALLINT(6), `time` TIMESTAMP, PRIMARY KEY (`request_id`), INDEX `followup_option_id` (`followup_option_id`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.request_followup SELECT * FROM outreach.request_followup; # create status_options table =========================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.status_options ( `id` smallint(6), `description` varchar(255), PRIMARY KEY (`id`), UNIQUE KEY `description` (`description`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.status_options (`id`, `description`) VALUES (4, 'authorised'), (5, 'complete'), (1, 'new'), (3, 'reported'), (2, 'screened'); # create followup_options table =========================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.followup_options ( `id` smallint(6), `option` varchar(20), `label` varchar(25), PRIMARY KEY (`id`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.followup_options (`id`, `option`, `label`) VALUES (1, 'zero_month', 'immediately'), (2, 'one_month', 'one month'), (3, 'three_month', 'three months'), (4, 'six_month', 'six months'), (5, 'twelve_month', 'twelve months'), (6, 'clinic_return', 'return to clinic'), (7, 'dead', 'dead'), (8, 'other', 'other'), (9, 'twenty_four_month', 'two years'), (10, 'sixty_month', 'five years'), (11, 'two_month', 'two months'), (12, 'six_week', 'six weeks'); # create referral_sources table =========================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.referral_sources ( `id` SMALLINT(6), `display_name` VARCHAR(255), `organisation_code` VARCHAR(6), PRIMARY KEY (`id`), UNIQUE INDEX `organisation_code` (`organisation_code`), UNIQUE INDEX `display_name` (`display_name`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.referral_sources SELECT rs.id, rs.display_name, rs.organisation_code FROM hilis4.referral_sources rs WHERE rs.id IN ( SELECT DISTINCT(practice_id) FROM outreach_patient_access.patient_demographics ); # create referrers table =========================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.referrers ( `id` INT(11), `name` VARCHAR(255), `national_code` VARCHAR(8), PRIMARY KEY (`id`), UNIQUE INDEX `national_code` (`national_code`), INDEX `name` (`name`) ) ENGINE=InnoDB; REPLACE INTO outreach_patient_access.referrers SELECT r.id, r.name, r.national_code FROM hilis4.referrers r WHERE r.id IN ( SELECT DISTINCT(gp_id) FROM outreach_patient_access.patient_demographics ); # lab_sections_data table =========================================== CREATE TABLE IF NOT EXISTS outreach_patient_access.lab_sections_data ( `request_id` INT(11), `count` SMALLINT, `time` TIMESTAMP NOT NULL default CURRENT_TIMESTAMP on update CURRENT_TIMESTAMP, PRIMARY KEY (`request_id`) ) ENGINE=InnoDB; INSERT INTO outreach_patient_access.lab_sections_data(request_id, count) SELECT r.id, COUNT(DISTINCT(t.description)) as 'count' # r.id, GROUP_CONCAT(DISTINCT(t.description)) FROM outreach_patient_access.requests r LEFT JOIN ( outreach.request_results rr JOIN ( outreach.lab_params p JOIN outreach.result_types t on p.department_id = t.id ) on rr.param_id = p.id ) on rr.request_id = r.id WHERE t.description in ('haematology', 'flow_cytometry') GROUP BY r.id;