P3X GitList

#!/usr/bin/perl

# expects hmds.dat & hmds.dat.cmp in /tmp - generates /tmp/dat-file-diffs.txt
# also generates /tmp/hmds.dat.a & /tmp/hmds.dat.b sorted in same order for WinMerge

use strict;
use warnings;

use IO::All;
use Data::Dumper;

my (%cmp, %test);

{ # create hash table of labno+sample_type => data for hmds.dat.cmp data:
	my @ref = io('/tmp/hmds.dat.cmp')->slurp; # warn Dumper \@ref; exit;
	%cmp = map {
		chomp;
		my @data = split '\|';
		"$data[1]~$data[11]" => \@data;
	} @ref; # warn Dumper %cmp; exit;

	my @data = sort by_num_name_specimen @ref;
	io('/tmp/hmds.dat.b')->append($_."\n") for @data;
}

{ # do same for hmds.dat data:
	my @ref = io('/tmp/hmds.dat')->slurp; # warn Dumper \@ref; exit;
	%test = map {
		chomp;
		my @data = split '\|';
		"$data[1]~$data[11]" => \@data;
	} @ref; # warn Dumper %test; exit;

	my @data = sort by_num_name_specimen @ref;
	io('/tmp/hmds.dat.a')->append($_."\n") for @data;
}

# compare %test & %cmp
open my $fh, '>' . '/tmp/dat-file-diffs.txt' or die $!;

while ( my ($ref,$data) = each %cmp ) {
    unless ($test{$ref}) { warn "$ref doesn't exist in hmds.dat"; next; }
    
	my @test = @{ $test{$ref} };
	my @cmp  = @$data;
   
	next if ( join '~', @test ) eq ( join '~', @cmp );
	
	# http://perldoc.perl.org/perlfaq4.html#How-do-I-compute-the-difference-of-two-arrays
	my @union = my @intersection = my @difference = ();
	my %count = ();
	foreach my $element (@cmp, @test) { $count{$element}++ }
	foreach my $element (keys %count) {
		push @union, $element;
		push @{ $count{$element} > 1 ? \@intersection : \@difference }, $element;
   }
   
   print $fh $ref, ":\n";
   print $fh Dumper \@difference;
}

sub by_num_name_specimen {
	my @aa = split '\|', $a;
	my @bb = split '\|', $b;
	
	my ($num_a) = $aa[1] =~ m!H(\d+)/\d{2}!;
	my ($num_b) = $bb[1] =~ m!H(\d+)/\d{2}!;
	
	$num_a  <=> $num_b ||	# request number
	$aa[2]  cmp $bb[2] ||	# last name
	$aa[3]  cmp $bb[3] ||	# first name
	$aa[11] cmp $bb[11];	# specimen
}