Mercurial > repos > elixir-it > corgat_join_nucmer
changeset 0:c68401961b4b draft
Uploaded
author | elixir-it |
---|---|
date | Thu, 23 Jul 2020 12:54:02 +0000 |
parents | |
children | f8fb4135630a |
files | join_nucmer/join_nucmer.pl join_nucmer/join_nucmer.xml |
diffstat | 2 files changed, 89 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/join_nucmer/join_nucmer.pl Thu Jul 23 12:54:02 2020 +0000 @@ -0,0 +1,55 @@ +@genomes=(); +$ofile=shift; +open(OUT,">$ofile"); +foreach $f (@ARGV) +{ + open(IN,$f); + %ldata=(); + $genome=""; + while(<IN>) + { + chomp(); + ($pos,$b1,$b2,$gen)=(split(/\s+/))[1,2,3,-1]; + next unless $b1=~/[ACTG]/ && $b2=~/[ACTG]/; + $ldata{$pos}=[$b1,$b2]; + if ($genome eq "") + { + $genome=$gen; + push(@genomes,$genome); + } + } + $prev_pos=0; + $prev_ref="na"; + $prev_alt="na"; + foreach $pos (sort{$a<=>$b} keys %ldata) + { + $dist=$pos-$prev_pos; + if ($dist>1) + { + $pos_append=$prev_pos-length($prev_alt)+1; + $dat_final{"$pos_append\_$prev_ref|$prev_alt"}{$genome}=1 unless $prev_ref eq "na"; + $prev_ref=$ldata{$pos}[0]; + $prev_alt=$ldata{$pos}[1]; + }else{ + $prev_ref.=$ldata{$pos}[0]; + $prev_alt.=$ldata{$pos}[1]; + } + $prev_pos=$pos; + } + $pos_append=$prev_pos-length($prev_alt)+1; + $dat_final{"$pos_append\_$prev_ref|$prev_alt"}{$genome}=1 if $prev_ref ne "na"; + +} + +print OUT " @genomes\n"; +foreach $pos (sort{$a<=>$b} keys %dat_final) +{ + $line="$pos "; + foreach $g (@genomes) + { + $val=$dat_final{$pos}{$g} ? 1 : 0; + $line.="$val "; + } + chop($line); + print OUT "$line\n"; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/join_nucmer/join_nucmer.xml Thu Jul 23 12:54:02 2020 +0000 @@ -0,0 +1,34 @@ +<tool id="join_nucmer" name="join_nucmer" version=""> + <description></description> + <command> + <![CDATA[ + perl $__tool_directory__/join_nucmer.pl $output_file ${" ".join(map(str, $input_file))} + ]]> + </command> + + <inputs> + <param name="input_file" format="tsv" type="data" multiple="true" label="show-snps tabular output" help="join mutiple nucmer output files" /> + </inputs> + + <outputs> + <data format="tsv" name="output_file" label="${tool.name} on ${on_string} consolidated variants file" /> + </outputs> + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + <help> + **What it does?** + + This tool is used to merge variant calls, in nucmer format, into a single tabular file. The output file will have as may columns as + the number of genomes provided in input. And as many rows as the number of variants observed in the genomes. For every genome assembly and + variant a simple binary code 1= present, 0=absent will be used to indicate whether that genome carries a specific variant. + This table should be provided to the FunAnn tool to obtain the functional annotation of the variants. + + </help> + + <tests> + <test> + </test> + </tests> + +</tool>