changeset 0:c68401961b4b draft

Uploaded
author elixir-it
date Thu, 23 Jul 2020 12:54:02 +0000
parents
children f8fb4135630a
files join_nucmer/join_nucmer.pl join_nucmer/join_nucmer.xml
diffstat 2 files changed, 89 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/join_nucmer/join_nucmer.pl	Thu Jul 23 12:54:02 2020 +0000
@@ -0,0 +1,55 @@
+@genomes=();
+$ofile=shift;
+open(OUT,">$ofile");
+foreach $f (@ARGV)
+{
+	open(IN,$f);
+	%ldata=();
+	$genome="";
+	while(<IN>)
+	{
+		chomp();
+		($pos,$b1,$b2,$gen)=(split(/\s+/))[1,2,3,-1];
+	        next unless $b1=~/[ACTG]/ && $b2=~/[ACTG]/;
+       	 	$ldata{$pos}=[$b1,$b2];
+       	 	if ($genome eq "")
+       	 	{
+       	 		$genome=$gen;
+       	 		push(@genomes,$genome);
+       	 	}
+	}
+	$prev_pos=0;
+	$prev_ref="na";
+	$prev_alt="na";
+	foreach $pos (sort{$a<=>$b} keys %ldata)
+	{
+		$dist=$pos-$prev_pos;
+	        if ($dist>1)
+	        {
+	     		$pos_append=$prev_pos-length($prev_alt)+1;
+	                $dat_final{"$pos_append\_$prev_ref|$prev_alt"}{$genome}=1 unless $prev_ref eq "na";
+	                $prev_ref=$ldata{$pos}[0];
+	                $prev_alt=$ldata{$pos}[1];
+	        }else{
+	         	$prev_ref.=$ldata{$pos}[0];
+	                $prev_alt.=$ldata{$pos}[1];
+	        }
+	        $prev_pos=$pos;
+	}
+	$pos_append=$prev_pos-length($prev_alt)+1;
+        $dat_final{"$pos_append\_$prev_ref|$prev_alt"}{$genome}=1 if $prev_ref ne "na";
+
+}
+
+print OUT " @genomes\n";
+foreach $pos (sort{$a<=>$b} keys %dat_final)
+{
+        $line="$pos ";
+        foreach $g (@genomes)
+        {
+                $val=$dat_final{$pos}{$g} ? 1 : 0;
+                $line.="$val ";
+        }
+        chop($line);
+        print OUT "$line\n";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/join_nucmer/join_nucmer.xml	Thu Jul 23 12:54:02 2020 +0000
@@ -0,0 +1,34 @@
+<tool id="join_nucmer" name="join_nucmer" version="">
+  <description></description>
+  <command>
+ <![CDATA[
+	perl $__tool_directory__/join_nucmer.pl $output_file ${" ".join(map(str, $input_file))}
+  ]]>
+  </command>
+
+  <inputs>
+    <param name="input_file" format="tsv" type="data" multiple="true" label="show-snps tabular output" help="join mutiple nucmer output files" />
+  </inputs>
+
+  <outputs>
+    <data format="tsv" name="output_file" label="${tool.name} on ${on_string} consolidated variants file" />
+  </outputs>
+  <stdio>
+    <exit_code range="1:" level="fatal" />
+  </stdio>
+  <help>
+	**What it does?**
+
+	This tool is used to merge variant calls, in nucmer format, into a single tabular file. The output file will have as may columns as
+	the number of genomes provided in input. And as many rows as the number of variants observed in the genomes. For every genome assembly and
+	variant a simple binary code 1= present, 0=absent will be used to indicate whether that genome carries a specific variant.
+	This table should be provided to the FunAnn tool to obtain the functional annotation of the variants.
+	
+  </help>
+
+  <tests>
+    <test>
+    </test>
+  </tests>
+
+</tool>