#!/usr/bin/perl use warnings; use strict; use lib '/home/raj/perl5/lib/perl5'; use IO::All; use FindBin; # warn $FindBin::Bin; use Data::Dumper; use lib $FindBin::Bin . '/../lib'; use ChartMaker; my $src = $ARGV[0]; unless ($src) { print "need a vcf file identifier !!\n\n"; exit; } my $src_dir = $FindBin::Bin . '/vcf'; my @dirs = io($src_dir)->all; # print "$_ is a " . $_->type . "\n" for @dirs; my %h; # start-position indexer # data structure for chartdirector: my %chart = ( data_title => $src ); # extract files from each dir, skip all except matching supplied filename: for my $d (@dirs) { # warn Dumper $_; next unless $d->type eq 'dir'; my @files = io($d)->all; # Dumper \@files; for my $f (@files) { # warn $f->filename; next unless $f->filename =~ /^$src/; # warn $f->filename; parse($f->name); } } # warn Dumper \%h; exit; # printf "| %9s | %s |\n", $_, $h{$_}{count} for sort by_frequency keys %h; # map { print Dumper $h{$_} } # grep { $h{$_}{count} == 1 } # sort by_start_pos keys %h; my $format = "| %9s | %6s | %8s | %10s | %4s | %6s |\n"; my @dividr = ( '=' x 9, '=' x 6, '=' x 8, '=' x 10, '=' x 4, '=' x 6 ); printf $format, qw( START REF ALT FILTER DP VF ); printf $format, @dividr; for my $location ( sort by_start_pos keys %h ) { # warn $location; next unless $h{$location}{count} == 1; my $data = $h{$location}{data}; # warn Dumper $data; for (@$data) { push @{ $chart{x_data} }, $_->{depth}; push @{ $chart{y_data} }, $_->{vf}; } printf $format, $location, @{$_}{ qw/ref alt filter depth vf/ } for @$data; printf $format, @dividr; } # generate chart of depth (x) vs variable frequency (y): @chart{ qw/x_title y_title chart_title/ } = ( 'Depth', 'VF', 'VCF analysis' ); ChartMaker::make_chart(\%chart); exit 0; #=============================================================================== sub parse { my $filename = shift; my @lines = io($filename)->slurp; # warn Dumper \@lines; my %i; # counter localised to individual file LINE: for (@lines) { next LINE if /^#/; # skip comments my @fields = split "\t", $_; my $start_pos = $fields[1] || next LINE; # warn $start_pos; my ($ref,$alt) = @fields[3,4]; # warn Dumper [$ref,$alt]; my $filter = $fields[6]; # warn $filter; my ($depth) = $fields[7] =~ /DP=(\d+)/; # warn $depth; my $results = $fields[-1]; # warn $results; my $vf = (split ':', $results)[3]; # warn $vf; my %r = ( filter => $filter, depth => $depth, ref => $ref, alt => $alt, vf => $vf, ); # add %r data as array(ref) - can have multiple entries per file: push @{ $h{$start_pos}{data} }, \%r; # auto-increment intra-file start position counter: $i{$start_pos}++; # $h{$start_pos}{depth} ||= $depth; # $h{$start_pos}{ref} ||= $ref; # $h{$start_pos}{alt} ||= $alt; # $h{$start_pos}{vf} ||= $vf; } # auto-increment %h start position key once for each start position key in # %i (don't care how many times start position entry appears in each file): for my $start_position (keys %i) { $h{$start_position}{count}++ } } sub by_start_pos { return $a <=> $b; } sub by_frequency { # 1st by frequency, 2nd by start position: return $h{$b}{count} <=> $h{$a}{count} || $a <=> $b; }