#!/usr/bin/perl # expects hmds.dat & hmds.dat.cmp in /tmp - generates /tmp/dat-file-diffs.txt # also generates /tmp/hmds.dat.a & /tmp/hmds.dat.b sorted in same order for WinMerge use strict; use warnings; use IO::All; use Data::Dumper; my (%cmp, %test); { # create hash table of labno+sample_type => data for hmds.dat.cmp data: my @ref = io('/tmp/hmds.dat.cmp')->slurp; # warn Dumper \@ref; exit; %cmp = map { chomp; my @data = split '\|'; "$data[1]~$data[11]" => \@data; } @ref; # warn Dumper %cmp; exit; my @data = sort by_num_name_specimen @ref; io('/tmp/hmds.dat.b')->append($_."\n") for @data; } { # do same for hmds.dat data: my @ref = io('/tmp/hmds.dat')->slurp; # warn Dumper \@ref; exit; %test = map { chomp; my @data = split '\|'; "$data[1]~$data[11]" => \@data; } @ref; # warn Dumper %test; exit; my @data = sort by_num_name_specimen @ref; io('/tmp/hmds.dat.a')->append($_."\n") for @data; } # compare %test & %cmp open my $fh, '>' . '/tmp/dat-file-diffs.txt' or die $!; while ( my ($ref,$data) = each %cmp ) { unless ($test{$ref}) { warn "$ref doesn't exist in hmds.dat"; next; } my @test = @{ $test{$ref} }; my @cmp = @$data; next if ( join '~', @test ) eq ( join '~', @cmp ); # http://perldoc.perl.org/perlfaq4.html#How-do-I-compute-the-difference-of-two-arrays my @union = my @intersection = my @difference = (); my %count = (); foreach my $element (@cmp, @test) { $count{$element}++ } foreach my $element (keys %count) { push @union, $element; push @{ $count{$element} > 1 ? \@intersection : \@difference }, $element; } print $fh $ref, ":\n"; print $fh Dumper \@difference; } sub by_num_name_specimen { my @aa = split '\|', $a; my @bb = split '\|', $b; my ($num_a) = $aa[1] =~ m!H(\d+)/\d{2}!; my ($num_b) = $bb[1] =~ m!H(\d+)/\d{2}!; $num_a <=> $num_b || # request number $aa[2] cmp $bb[2] || # last name $aa[3] cmp $bb[3] || # first name $aa[11] cmp $bb[11]; # specimen }