changeset 0:482e911975a1 draft default tip

Uploaded
author elixir-it
date Thu, 08 Nov 2018 12:54:29 -0500
parents
children
files 14_june_18_intersect.indels.with.infos.pl intersect_indels.xml
diffstat 2 files changed, 151 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/14_june_18_intersect.indels.with.infos.pl	Thu Nov 08 12:54:29 2018 -0500
@@ -0,0 +1,110 @@
+#!/usr/bin/perl -w
+#
+use strict;
+
+my $f1=shift;	#vcf di varscan
+my $f2=shift;   #vcf di freebayes
+my $f3=shift;   #vcf di GATK
+my $fout=shift;
+
+die("non trovo il file $f1") unless -e $f1;
+die("non trovo il file $f2") unless -e $f2;
+die("non trovo il file $f2") unless -e $f3;
+
+die ("non specificato il file di output") unless $fout;
+
+my %ALL=();
+my %L=();
+my %H=();
+
+open(OUT,">$fout.common.indels.vcf");
+open(UN,">$fout.unique.indels.vcf");
+
+open(IN,$f1);
+while(<IN>)
+{
+	next if $_=~/^\#/;
+	my ($chr,$st,$b1,$b2)=(split())[0,1,3,4];
+	my $sum=length($b1)+length($b2);
+	next if $sum<=2;
+	next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) );
+	$H{$chr}{$st}++;
+	$ALL{$chr}{$st}=$_;
+	push(@{$L{$chr}{$st}},"VS");
+}
+close(IN);
+
+open(IN,$f2);
+while(<IN>)
+{
+	next if $_=~/^\#/;
+        my ($chr,$st,$b1,$b2)=(split())[0,1,3,4];
+	my $sum=length($b1)+length($b2);
+        next if $sum<=2;
+        next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) );
+       	$H{$chr}{$st}++;
+	$ALL{$chr}{$st}=$_;
+	push(@{$L{$chr}{$st}},"FB");
+}
+close(IN);
+
+open(IN,$f3);
+while(<IN>)
+{
+        if ($_=~/^\#/)
+	{
+		print OUT if $_=~/fileformat/;
+                print OUT if $_=~/contig/;
+		print OUT if $_=~/CHROM/;
+                print UN if  $_=~/fileformat/;
+                print UN if  $_=~/contig/;
+		print UN if  $_=~/CHROM/;
+        	next;
+	}
+        my ($chr,$st,$b1,$b2)=(split())[0,1,3,4];
+        my $sum=length($b1)+length($b2);
+        next if $sum<=2;
+	next if (length($b1)==length($b2) && !($b1=~/\-/ || $b2=~/\-/ || $b1=~/\./ || $b2=~/\./) );
+        $H{$chr}{$st}++;
+        $ALL{$chr}{$st}=$_;
+	push(@{$L{$chr}{$st}},"GA");
+}
+close(IN);
+
+
+foreach my $chr (sort keys %H)
+{
+	foreach my $s (sort{$a<=>$b} keys %{$H{$chr}})
+	{
+			my $vl=$H{$chr}{$s};
+			unless ($vl)
+			{
+				warn("non so contare le occorrenze di $chr $s\n");
+				next;
+			}
+			my $out_vl=$ALL{$chr}{$s};
+			unless ($out_vl)
+			{
+				warn("non so trovare le info di $chr $s\n");
+				next;
+			}
+			my @outs=(split(/\t/,$out_vl));
+			my $comm=$outs[7];
+			if ($vl>=2)
+			{
+				my $mm=$L{$chr}{$s} ? join("_",@{$L{$chr}{$s}}) : "NA";
+				my $comm2=$comm;
+				$comm2.=";NM=$vl;LM=$mm";
+				$outs[7]=$comm2;
+				$out_vl=join("\t",@outs);
+				print OUT $out_vl;
+			}else{
+                                my $mm=$L{$chr}{$s} ? $L{$chr}{$s}[0] : "NA";
+                                my $comm2=$comm;
+                                $comm2.=";NM=$vl;LM=$mm";
+                                $outs[7]=$comm2;
+                                $out_vl=join("\t",@outs);
+                                print UN $out_vl;
+			}
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/intersect_indels.xml	Thu Nov 08 12:54:29 2018 -0500
@@ -0,0 +1,41 @@
+<tool id="intersect_indels" name="intersect_indels" version="1">
+  <description>tool for indel VCF file intersection </description>
+  <requirements>
+  </requirements>
+  <command>
+	perl  $__tool_directory__/14_june_18_intersect.indels.with.infos.pl
+  $input1 $input2 $input3 intersect 2>log
+  </command>
+  <inputs>
+    <param name="input1" format="vcf" type="data" label="VCF File" help="Varscan2 indels output" />
+    <param name="input2" format="vcf" type="data" label="VCF File" help="Frebayes indels output" />
+    <param name="input3" format="vcf" type="data" label="VCF File" help="GATK HaplotypeCaller indels output" />
+  </inputs>
+  <outputs>
+    <data format="vcf" name="output1" from_work_dir="intersect.common.indels.vcf" label="${tool.name} on ${on_string} common indels" />
+    <data format="vcf" name="output2" from_work_dir="intersect.unique.indels.vcf" label="${tool.name} on ${on_string} unique indels" />
+    <data format="txt" name="output3" from_work_dir="log" label="${tool.name} on ${on_string} log" />
+  </outputs>
+  <stdio>
+  </stdio>
+  <help>
+**WHAT IT DOES**
+
+Predictions of indels from Varscan2, GATK and Freebayes are consolidated into a single call-set following a simple majority consensus rule. Variants  identified by at least two methods are incorporated into a final “high confidence” call set.Singleton variants predicted by only one method are considered less reliable and are included in a low quality call-set.
+
+**The final output consists in two VCF files:**
+
+-INDELs common to at least 2 methods
+
+-Singleton INDELs obtained from a single method
+
+  </help>
+  <citations>
+	<citation type="doi">10.1186/s12864-018-4508-1</citation>
+  </citations>
+  <tests>
+    <test>
+    </test>
+  </tests>
+
+</tool>