#!/usr/bin/perl use warnings; use strict; use lib '/home/raj/perl5/lib/perl5'; use lib '/home/raj/perl-lib/ChartDirector'; use IO::All; use FindBin; # warn $FindBin::Bin; use Data::Dumper; use perlchartdir; my $ref = $ARGV[0]; unless ($ref) { print "need a vcf identifier !!\n\n"; exit; } my $src_dir = $FindBin::Bin . '/vcf'; my @dirs = io($src_dir)->all; my %h; # start-position indexer my %chart; # extract files from each dir, skip all except matching supplied filename: for my $d (@dirs) { # warn Dumper $_; my @files = io($d)->all; # Dumper \@files; for my $f (@files) { # warn $f->filename; next unless $f->filename =~ /^$ref/; # warn $f->filename; parse($f->name); } } # warn Dumper \%h; exit; # printf "| %9s | %s |\n", $_, $h{$_}{count} for sort by_frequency keys %h; # map { print Dumper $h{$_} } grep { $h{$_}{count} == 1 } sort by_start_pos keys %h; my $format = "| %9s | %6s | %8s | %10s | %4s | %6s |\n"; my @dividr = ( '=' x 9, '=' x 6, '=' x 8, '=' x 10, '=' x 4, '=' x 6 ); printf $format, qw( START REF ALT FILTER DP VF ); printf $format, @dividr; for my $location ( sort by_start_pos keys %h ) { # warn $location; next unless $h{$location}{count} == 1; my $data = $h{$location}{data}; # warn Dumper $data; for (@$data) { push @{ $chart{x} }, $_->{depth}; push @{ $chart{y} }, $_->{vf}; } printf $format, $location, @{$_}{ qw/ref alt filter depth vf/ } for @$data; printf $format, @dividr; } make_chart(); sub parse { my $filename = shift; my @lines = io($filename)->slurp; # warn Dumper \@lines; my %i; # counter localised to individual file LINE: for (@lines) { next LINE if /^#/; # skip comments my @fields = split "\t", $_; my $start_pos = $fields[1] || next LINE; # warn $start_pos; my ($ref,$alt) = @fields[3,4]; # warn Dumper [$ref,$alt]; my $filter = $fields[6]; # warn $filter; my ($depth) = $fields[7] =~ /DP=(\d+)/; # warn $depth; my $results = $fields[-1]; # warn $results; my $vf = (split ':', $results)[3]; # warn $vf; my %r = ( filter => $filter, depth => $depth, ref => $ref, alt => $alt, vf => $vf, ); push @{ $h{$start_pos}{data} }, \%r; # $h{$start_pos}{depth} ||= $depth; # $h{$start_pos}{ref} ||= $ref; # $h{$start_pos}{alt} ||= $alt; # $h{$start_pos}{vf} ||= $vf; $i{$start_pos}++; # only want 1 or 0 flag } # auto-increment %h key if start position entry appears at least once in file: $h{$_}{count}++ for keys %i; } sub by_start_pos { return $a <=> $b; } sub by_frequency { return $h{$b}{count} <=> $h{$a}{count} || $a <=> $b; } sub make_chart { my $c = new XYChart(650, 420); $c->setPlotArea(75, 65, 550, 300, -1, -1, 0xc0c0c0, 0xc0c0c0, -1); $c->addLegend(50, 30, 0, "timesbi.ttf", 12)->setBackground($perlchartdir::Transparent); $c->addTitle("Genetically Modified Predator", "timesbi.ttf", 18); $c->yAxis()->setTitle("VF", "arialbi.ttf", 12); $c->xAxis()->setTitle("Depth", "arialbi.ttf", 12); $c->xAxis()->setWidth(3); $c->yAxis()->setWidth(3); $c->addScatterLayer($chart{x}, $chart{y}, "Unique entries", $perlchartdir::DiamondSymbol, 8, 0xff9933); $c->makeChart("scatter.png"); }