use Modern::Perl; # extracts data from request_storage table for Syncona pilot project. Uses # hmrn_pilot_manifest spreadsheet as a template to populate required cols use Spreadsheet::WriteExcel::Simple; use Data::Printer; use FindBin; # warn $FindBin::Bin; use lib '/home/raj/apps/HILIS4/lib'; use LIMS::Local::ScriptHelpers; use LIMS::Local::Utils; my $tools = LIMS::Local::ScriptHelpers->new(); my $dbix = $tools->dbix(); $dbix->lc_columns = 0; # preserve col case my $out_file = "$FindBin::Bin/manifest.xls"; #=============================================================================== # use Spreadsheet::Read; # my $book = ReadData("$FindBin::Bin/hmrn_pilot_manifest.xls"); # p($book); exit; my $xl = Spreadsheet::WriteExcel::Simple->new; my $headers = header(); # p($headers); exit; $xl->write_bold_row($headers); # p(@headers); exit; my $sql = query(); my $result = $dbix->query($sql); my %h; # hashref of plateId / location => request_storage table data while ( my $ref = $result->hash ) { # p($ref); next; my $location = $ref->{location} or die p($ref); my $plateId = $ref->{plateId} or die p($ref); # remove leading zeros from fluidx-generated well to match syncona format (eg A2): $location = ( $1 . int $2 ) if $location =~ /([A-H])(\d+)/; # warn $location; $h{$plateId}{$location} = $ref; } # p(%h); exit; # get 1st 3 cols of syncona manifest (plateId, wellId, sampleId): my @sangar_cols = map { [ split "\t" ] } grep chomp, ; # p(@sangar_cols); exit; ROW: for my $row(@sangar_cols) { # $row = arrayref my ($plateId, $well, $sampleId) = @$row; # p($sampleId); # next ROW to skip empty, or do "if (my $ref = [...])" to preserve empty rows: my $ref = $h{$plateId}{$well} || next ROW; $ref->{lab_no} = sprintf '%s/%02d', $ref->{request_number}, $ref->{year} - 2000; # p $ref->{lab_no}; # only require SupplierSampleName, Gender, TaxonID & CommonName @{$row}[3, 7, 26, 27] = ( @{$ref}{qw(vialId gender)}, 9606, 'Homo Sapien' ); # additional fields for our use: @{$row}[11, 31, 32] = ( @{$ref}{ qw(source specimen_code lab_no) } ); $xl->write_row($row); =begin # don't need all these push @$row, ( $lab_ref, # sample name 'HMRN pilot', # cohort $ref->{volume}, $ref->{concentration}, $ref->{gender}, 'UK', 'Yorkshire', 'N/A', # ethnicity $ref->{source}, # DNA source $ref->{year}, # only need year of registration $ref->{stored}, # SHOULD BE DNA EXTRACTION DATE 'No', # control ? 'No', # resubmitted ? $ref->{method}, # of DNA extration 'No', # purified ? 'N/A', # purification method 'Promega', # concentration determination method '4C', # DNA storage temp 'N/A', # mother 'N/A', # father 'N/A', # sibling 'N/A', # GC content 'N/A', # public name 9606, # taxon ID 'Homo Sapien', # common name '??', # sample description 'N/A', # strain 'N/A', # sample visibility $ref->{specimen_code}, # sample type '??', # sample accession no ? vialID, request.id, lab_ref $sampleId, # sanger sample/donor ID 'N/A', # phenotype ); =cut } $xl->save( $out_file ); sub query { return q! select case when sr.plateId = '50050662' then 'DN349449Q' when sr.plateId = '50050675' then 'DN349450J' when sr.plateId = '50050670' then 'DN349448P' when sr.plateId = '50007214' then 'DN349451K' else sr.plateId end as 'plateId', rs.vialId, rs.vial_location as location, r.request_number, r.year, year(rs.created_at) as 'stored', s.sample_code as 'specimen_code', case when p.gender = 'U' then p.first_name else p.gender end as gender, rs.sample, rs.volume, rs.concentration, rs.source, rs.method, rv.diagnosis from request_storage rs join requests r on rs.request_id = r.id join ( patient_case pc join patients p on pc.patient_id = p.id ) on r.patient_case_id = pc.id join storage_racks sr on rs.rack_id = sr.id join specimens s on rs.specimen_id = s.id join authorised_reports_view rv on rv.id = r.id where signed_out is not null order by sr.id, rs.vial_location!; } sub header { my @rows = _header(); # insert line-breaks as per original spreadsheet format: for (@rows) { s/ /\n/g ; s/~/ /g }; # break on space, recover protected spaces return \@rows; } sub _header { return split "\n", <