RSS Git Download  Clone
Raw Blame History
#!/usr/bin/perl

use warnings;
use strict;

use lib '/home/raj/perl5/lib/perl5';
use lib '/home/raj/perl-lib/ChartDirector';

use IO::All;
use FindBin; # warn $FindBin::Bin;
use Data::Dumper;
use perlchartdir;

my $ref = $ARGV[0];

unless ($ref) {
	print "need a vcf identifier !!\n\n"; exit;
}

my $src_dir = $FindBin::Bin . '/vcf';

my @dirs = io($src_dir)->all;

my %h; # start-position indexer
my %chart;

# extract files from each dir, skip all except matching supplied filename:
for my $d (@dirs) { # warn Dumper $_;
	my @files = io($d)->all; # Dumper \@files;  
	for my $f (@files) { # warn $f->filename;
		next unless $f->filename =~ /^$ref/; # warn $f->filename;
		parse($f->name);
	}
} # warn Dumper \%h; exit;

# printf "| %9s | %s |\n", $_, $h{$_}{count} for sort by_frequency keys %h;
# map { print Dumper $h{$_} } grep { $h{$_}{count} == 1 } sort by_start_pos keys %h;

my $format = "| %9s | %6s | %8s | %10s | %4s | %6s |\n";
my @dividr = ( '=' x 9, '=' x 6, '=' x 8, '=' x 10, '=' x 4, '=' x 6 );

printf $format, qw( START REF ALT FILTER DP VF );
printf $format, @dividr;

for my $location ( sort by_start_pos keys %h ) { # warn $location;
	next unless $h{$location}{count} == 1;
	my $data = $h{$location}{data}; # warn Dumper $data;

	for (@$data) {
		push @{ $chart{x} }, $_->{depth};
		push @{ $chart{y} }, $_->{vf};		
	}
	printf $format, $location, @{$_}{ qw/ref alt filter depth vf/ } for @$data;
	printf $format, @dividr;
}

make_chart();

sub parse {
	my $filename = shift;
	my @lines = io($filename)->slurp; # warn Dumper \@lines;
	
	my %i; # counter localised to individual file
	LINE: for (@lines) {
		next LINE if /^#/; # skip comments
		my @fields = split "\t", $_;
		
		my $start_pos  = $fields[1] || next LINE; # warn $start_pos;
		my ($ref,$alt) = @fields[3,4]; # warn Dumper [$ref,$alt];
		my $filter     = $fields[6]; # warn $filter;
		my ($depth)    = $fields[7] =~ /DP=(\d+)/; # warn $depth;
		my $results    = $fields[-1]; # warn $results;		
		my $vf = (split ':', $results)[3]; # warn $vf;
		
		my %r = (
			filter => $filter,
			depth  => $depth,
			ref    => $ref,
			alt    => $alt,
			vf     => $vf,
		);
			
		push @{ $h{$start_pos}{data} }, \%r;
		
#		$h{$start_pos}{depth}  ||= $depth;
#		$h{$start_pos}{ref}    ||= $ref;
#		$h{$start_pos}{alt}    ||= $alt;
#		$h{$start_pos}{vf}     ||= $vf;

		$i{$start_pos}++; # only want 1 or 0 flag
	}
	# auto-increment %h key if start position entry appears at least once in file:
	$h{$_}{count}++ for keys %i;
}

sub by_start_pos {
	return $a <=> $b;
}
sub by_frequency {
	return $h{$b}{count} <=> $h{$a}{count} || $a <=> $b;
}

sub make_chart {
	my $c = new XYChart(650, 420);

	$c->setPlotArea(75, 65, 550, 300, -1, -1, 0xc0c0c0, 0xc0c0c0, -1);

	$c->addLegend(50, 30, 0, "timesbi.ttf", 12)->setBackground($perlchartdir::Transparent);

	$c->addTitle("Genetically Modified Predator", "timesbi.ttf", 18);

	$c->yAxis()->setTitle("VF", "arialbi.ttf", 12);

	$c->xAxis()->setTitle("Depth", "arialbi.ttf", 12);

	$c->xAxis()->setWidth(3);
	$c->yAxis()->setWidth(3);

	$c->addScatterLayer($chart{x}, $chart{y}, "Unique entries",
		$perlchartdir::DiamondSymbol, 8, 0xff9933);

	$c->makeChart("scatter.png");
}