Mercurial > repos > elixir-it > covacs_intersect_indels
view 14_june_18_intersect.indels.with.infos.pl @ 0:482e911975a1 draft default tip
Uploaded
author | elixir-it |
---|---|
date | Thu, 08 Nov 2018 12:54:29 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w # use strict; my $f1=shift; #vcf di varscan my $f2=shift; #vcf di freebayes my $f3=shift; #vcf di GATK my $fout=shift; die("non trovo il file $f1") unless -e $f1; die("non trovo il file $f2") unless -e $f2; die("non trovo il file $f2") unless -e $f3; die ("non specificato il file di output") unless $fout; my %ALL=(); my %L=(); my %H=(); open(OUT,">$fout.common.indels.vcf"); open(UN,">$fout.unique.indels.vcf"); open(IN,$f1); while(<IN>) { next if $_=~/^\#/; my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; my $sum=length($b1)+length($b2); next if $sum<=2; next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) ); $H{$chr}{$st}++; $ALL{$chr}{$st}=$_; push(@{$L{$chr}{$st}},"VS"); } close(IN); open(IN,$f2); while(<IN>) { next if $_=~/^\#/; my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; my $sum=length($b1)+length($b2); next if $sum<=2; next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) ); $H{$chr}{$st}++; $ALL{$chr}{$st}=$_; push(@{$L{$chr}{$st}},"FB"); } close(IN); open(IN,$f3); while(<IN>) { if ($_=~/^\#/) { print OUT if $_=~/fileformat/; print OUT if $_=~/contig/; print OUT if $_=~/CHROM/; print UN if $_=~/fileformat/; print UN if $_=~/contig/; print UN if $_=~/CHROM/; next; } my ($chr,$st,$b1,$b2)=(split())[0,1,3,4]; my $sum=length($b1)+length($b2); next if $sum<=2; next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) ); $H{$chr}{$st}++; $ALL{$chr}{$st}=$_; push(@{$L{$chr}{$st}},"GA"); } close(IN); foreach my $chr (sort keys %H) { foreach my $s (sort{$a<=>$b} keys %{$H{$chr}}) { my $vl=$H{$chr}{$s}; unless ($vl) { warn("non so contare le occorrenze di $chr $s\n"); next; } my $out_vl=$ALL{$chr}{$s}; unless ($out_vl) { warn("non so trovare le info di $chr $s\n"); next; } my @outs=(split(/\t/,$out_vl)); my $comm=$outs[7]; if ($vl>=2) { my $mm=$L{$chr}{$s} ? join("_",@{$L{$chr}{$s}}) : "NA"; my $comm2=$comm; $comm2.=";NM=$vl;LM=$mm"; $outs[7]=$comm2; $out_vl=join("\t",@outs); print OUT $out_vl; }else{ my $mm=$L{$chr}{$s} ? $L{$chr}{$s}[0] : "NA"; my $comm2=$comm; $comm2.=";NM=$vl;LM=$mm"; $outs[7]=$comm2; $out_vl=join("\t",@outs); print UN $out_vl; } } }