Mercurial > repos > steffen > covenntree
changeset 4:f08d1b99069b draft
Deleted selected files
author | steffen |
---|---|
date | Fri, 12 Dec 2014 10:24:51 -0500 |
parents | 6d873d192a8b |
children | a9fff1670e9d |
files | coVennTree/coVennTree/.DS_Store coVennTree/coVennTree/._.DS_Store coVennTree/coVennTree/coVennTree.pl coVennTree/coVennTree/coVennTree.xml coVennTree/coVennTree/static/.DS_Store coVennTree/coVennTree/static/._.DS_Store coVennTree/coVennTree/static/images/._example1.png coVennTree/coVennTree/static/images/._venn-graph-off.png coVennTree/coVennTree/static/images/._venn-graph-on.png coVennTree/coVennTree/static/images/example1.png coVennTree/coVennTree/static/images/venn-graph-off.png coVennTree/coVennTree/static/images/venn-graph-on.png coVennTree/coVennTree/tool_dependencies.xml |
diffstat | 13 files changed, 0 insertions(+), 997 deletions(-) [+] |
line wrap: on
line diff
--- a/coVennTree/coVennTree/coVennTree.pl Fri Dec 12 10:24:00 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,833 +0,0 @@ -#!/usr/bin/perl -use strict; -use File::Basename; -use List::MoreUtils qw( minmax ); - -# -------------------------------------------------------------------------------------------------- -# author: steffen lott -# mail: steffen.lott@uni-freiburg.de -# date: 06-10-2014 -# version: 1.6 -# -# description: -# The tool converts an output from MEGAN in a special network which can visuallized with -# cytoscape. Gaper produces two files, the first one contains the network and the second one -# describes the attributes of the network. -# -------------------------------------------------------------------------------------------------- - -# return version number -if (@ARGV == 0) { - print "CoVennTree-Version 1.6\n"; - print "COMMAND\n"; - print "coventree argv0 argv1 argv2 argv3 argv4\n"; - print "--------------\n"; - print "argv0 = input file\n"; - print "argv1 = color mode [1,4]\n"; - print "argv2 = transformation function [1,7]\n"; - print "argv3 = only leaf information => 0 ; all information => 1\n"; - print "argv4 = output file name network\n"; - print "argv5 = output file name attributes\n"; - exit; -} - - - - -# container to represent the network -my @network = (); - - - -# 0 PARAMETER_______________ -# read argument from command-line -# important: DSV -> taxon-path, count(s) -> assigned -> tab -my $megan_file = $ARGV[0]; - - -# 1 PARAMETER_______________ -my $colorMode; -# color mode for venn-diagrams 0,1,2,3,4 -if(defined $ARGV[1]){ - $colorMode = $ARGV[1]; -}else{ - $colorMode = 3; -} - - -# 2 PARAMETER_______________ -# 2 different transformations functions -my $transFnc = ""; -if(defined $ARGV[2]){ # small datasets - $transFnc = $ARGV[2]; -}else{ - $transFnc = 1; -} - - -# 3 PARAMETER_______________ -# the user can switch between "only leaf information" -# or the complete tree information. the last one takes also the not assigned reads -# and creates artificial nodes to keep this number -my $onlyLeafs; -if(defined $ARGV[3]){ - if($ARGV[3] == 0){ - $onlyLeafs = "on"; - }elsif($ARGV[3] == 1){ - $onlyLeafs = "off"; - } -}else{ # all information will be used! not assigned and assigned - $onlyLeafs = "off"; -} - -# 4 PARAMETER_______________ -# output -> network -my $out_network = $ARGV[4]; - -# 5 PARAMETER_______________ -# output -> attributes -my $out_attributes = $ARGV[5]; - - - -# check the input format of the file. only a file with exactly three datasets are excepted. the other one will fill up with zeros - - -# read-in MEGAN-file -# if #{data-sets} = 1 -> no heade line -# if #{data-sets} > 1 -> heade line " #Datasets set1 set2 ..." -open(inFile , "<$megan_file") || die "File not found - \"Path-File\"!\n"; -my @pairIds = (); -my $header = ""; -my @input_file = (); -my @numberOfSets = (); - -while(<inFile>){ - chomp($_); - if($_ =~ /^#/){ - $header = $_; - @numberOfSets = split("\t", $_); - }else{ - #print @numberOfSets . "\n"; - # check the number of datasets are included - if(@numberOfSets == 0 || @numberOfSets == 1 || @numberOfSets > 4){ # no set is in the file - print "Error: File doesn't contain any dataset or contain more than three!"; - exit; - }elsif(@numberOfSets == 2){ # only one set is in the file -> add 2x zeros - $_ .= "\t" . 0 . "\t" . 0; - }elsif(@numberOfSets == 3){ # only two sets are in the file -> add 1x zeros - $_ .= "\t" . 0; - } - - addToNetwork($_); - push(@input_file, $_); - } -} -close(inFile); - - -# -------------------------------------------------------------------------------------------------------------- -# -------------------------------------------------------------------------------------------------------------- -# (1) PREPROCESSING: detect all leaf nodes -my $modifiedInput = detectNonLeafs(); - -# (2) MAIN COMPUTATION: compute deep by deep (path deep ex. root;Viruses; => deep 2) -my ($vennClusterOut, $specialNumberOut) = clusterVennBottomUp(); - -# (3) VENN-END-PREPERATION: sum up all single values (d1-d3), transform abs values into -my $vennToStore = vennForCytoscape($vennClusterOut, $specialNumberOut); - -# (4) SAVE RESULTS INTO FILES: one file contains the network (.sif), the other one contains the attributes -storeNetwork(); -store2FileVenn($vennToStore); -# -------------------------------------------------------------------------------------------------------------- -# -------------------------------------------------------------------------------------------------------------- - - - - -sub store2FileVenn{ - my $outVenn = $_[0]; - # test - my $tmpFileName = $out_attributes; - - #my $tmpFileName = "./network.venn"; - open(FILE , ">$tmpFileName") || die "File can't be written - \"venn - File\"!\n"; - print FILE join("\n", @{$outVenn}) . "\n"; - close(FILE); -} - - -sub vennForCytoscape{ - my $vennCluster = $_[0]; - my $specialNum = $_[1]; - my $specNformat = 0; - my @out = (); - # datastructure $vennCluster=> vennCluster[]{}{} => values - #delete $vennCluster->[0]{"no"}; - my $frameSize = 0; - my $values = 0; - my $googleURL = ""; - my $outStr = ""; - - for(my $i = 0 ; $i < @{$vennCluster}; $i++){ - while ( my($key, $value) = each %{$vennCluster->[$i]} ){ - while ( my($key2, $value2) = each %{$vennCluster->[$i]{$key}} ){ - $values = $vennCluster->[$i]{$key}{$key2}; - $frameSize = getCorrectedFrameSize($values); - - if(defined $specialNum->[$i]{$key}{$key2}){ - $specNformat = $key2 . "[" . sprintf("%.3f", $specialNum->[$i]{$key}{$key2}) . "]"; - }else{ - $specNformat = $key2; - } - - # old version, this version works pretty well - #$frameSize = getFrameSize($values); - $googleURL = computeGoogleApiStrRotation($frameSize,$values,$colorMode); - $outStr = $key2 . "\t" . $googleURL . "\t" . $specNformat . "\t" . $values; - push(@out, $outStr); - } - } - } - return \@out; -} - - -# this function keep the biggest node in the lowest depth,... -sub computeGoogleApiStrRotation{ - my $frameSize = $_[0]; - my $values = $_[1]; - my $colMode = $_[2]; - my @relVal = (); - my @col = (); - my %sort = (); - my @store = (); - my %ovHash = (); - my @storeOldPos = (); - my @spVal = split(" ", $values); - my $sum = $spVal[0] + $spVal[1] + $spVal[2]; - - # user color-mode - if($colMode == 0){ - $col[0] = "18A3F2"; $col[1] = "FA0800"; $col[2] = "FFF905"; - }elsif($colMode == 1){ - $col[0] = "FF2A00"; $col[1] = "9CFF00"; $col[2] = "00CCFF"; - }elsif($colMode == 2){ - $col[0] = "B4FF00"; $col[1] = "FF00C6"; $col[2] = "00AEFF"; - }elsif($colMode == 3){ - $col[0] = "82FF00"; $col[1] = "7E00FF"; $col[2] = "FF003B"; - }elsif($colMode == 4){ - $col[0] = "1A1A1A"; $col[1] = "8A8A8A"; $col[2] = "C7C7C7"; - } - - $sort{"0"} = $spVal[0]; $sort{"1"} = $spVal[1]; $sort{"2"} = $spVal[2]; - - my $tmp = 0; - foreach(@spVal){ - if($sum != 0){ - $tmp = $_ * 100 / $sum; - }else{ - $tmp = 0; - } - push(@relVal,$tmp); - } - - $ovHash{"01"} = $relVal[3]; $ovHash{"10"} = $relVal[3]; - $ovHash{"02"} = $relVal[4]; $ovHash{"20"} = $relVal[4]; - $ovHash{"21"} = $relVal[5]; $ovHash{"12"} = $relVal[5]; - - my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . "&chco="; - # change color position in the google output string corresponding to the highest value - foreach my $k( sort {$sort{$b}<=>$sort{$a}} keys %sort) { - $url .= $col[$k] . ","; - push(@store, $k); - } - chop($url); - - $url .= "&cht=v&chd=t:"; - # sort node values in the right order - for(my $i = 0 ; $i < @relVal - 4 ; $i++){ - #print $i . "\t" . $store[$i] . "\t" . $relVal[$store[$i]] . "\n"; - $url .= sprintf("%.1f", $relVal[$store[$i]]) . ","; - } - # sort intersection values in the right order - my $tStr0 = $store[0] . $store[1]; - my $tStr1 = $store[0] . $store[2]; - my $tStr2 = $store[1] . $store[2]; - $url .= sprintf("%.1f", $ovHash{$tStr0}) . "," . sprintf("%.1f", $ovHash{$tStr1}) . "," . sprintf("%.1f", $ovHash{$tStr2}) . ","; - $url .= "0.0"; - $url .= "&chf=bg,s,e0dede00"; -} - - -# original function without any node rotation. the order of the nodes is always the same -sub computeGoogleApiStr{ - my $frameSize = $_[0]; - my $values = $_[1]; - my @relVal = (); - my @spVal = split(" ", $values); - my $sum = $spVal[0] + $spVal[1] + $spVal[2]; - - foreach(@spVal){ - my $tmp = $_ * 100 / $sum; - push(@relVal,$tmp); - } - my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . - #color - "&chco=FF6342,ADDE63,63C6DE" . - #"&chco=0000FF,0099FF,00FFFF" . - - "&cht=v&chd=t:"; - - for(my $i = 0 ; $i < @relVal - 1 ; $i++){ - $url .= sprintf("%.1f", $relVal[$i]) . ","; - } - $url .= sprintf("%.1f", $relVal[-1]); - - $url .= "&chf=bg,s,e0dede00"; -} - - -sub getCorrectedFrameSize{ - my $values = $_[0]; - my @spVal = split(" ", $values); - my $sum = $spVal[0] + $spVal[1] + $spVal[2]; - my $frame = lookupPixelSQRT($sum); - - # find maxValue position ([0] - [2]) - my $arrPos = getMaxPos($spVal[0], $spVal[1], $spVal[2]); - my $addOver= -1; - my $addNode= -1; - my $addSum = -1; - - if($arrPos == 0){ - $addNode = $spVal[1] + $spVal[2]; - $addOver = $spVal[3] + $spVal[4]; - }elsif($arrPos == 1){ - $addNode = $spVal[0] + $spVal[2]; - $addOver = $spVal[3] + $spVal[5]; - }elsif($arrPos == 2){ - $addNode = $spVal[0] + $spVal[1]; - $addOver = $spVal[4] + $spVal[5]; - } - # if the 2 of 3 nodes have no overlap to the largest one, than the complete value - # will be used to compute a frame and add this to the existing frame - $addSum = $addNode - $addOver; - - my $addFrame = lookupPixelSQRT($addSum); - my $sumFrame = $frame + $addFrame; - return $sumFrame; -} - - -sub getMaxPos{ - my $pos = -1; - if( ($_[0] >= $_[1]) && ($_[0] >= $_[2]) ){ - $pos = 0; - }elsif( ($_[1] >= $_[0]) && ($_[1] >= $_[2]) ){ - $pos = 1; - }else{ - $pos = 2; - } - return $pos; -} - - -sub getFrameSize{ - my $values = $_[0]; - my @spVal = split(" ", $values); - my $sum = $spVal[0] + $spVal[1] + $spVal[2]; - my $frame = lookupPixel($sum); - return $frame; -} - - - -sub clusterVennBottomUp{ - # transform $modifiedInput into datastructure - # container => [deep]{parent}{child} - my @container = (); - my @containerSpecial = (); - my @nodeValues = (); - my $maxDeep = 0; - my %helperHash = (); - my %specialMatrixAll = (); - - foreach(@{$modifiedInput}){ - my @tmpArr = split('\t', $_); - my @path = split(';' , $tmpArr[0]); - my $deep = @path - 1; - - if(($deep - 1) >= 0){ - $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1]; - $nodeValues[$deep]{$path[-2]}{$path[-1]} = "f"; - }else{ - $container[$deep]{"no"}{$path[-1]} = $tmpArr[1]; - } - } - # start computation from the deepest path to the root node - for(my $i = (@container-1) ; $i >= 0 ; $i--){ - while ( my($key, $value) = each %{$container[$i]} ){ - # update all predecessor nodes - while ( my($keyUp, $valueUp) = each %helperHash ){ - if(exists $container[$i]{$key}{$keyUp}){ - $container[$i]{$key}{$keyUp} = $valueUp; - # compute special value by decompose venn's and add special value - $containerSpecial[$i]{$key}{$keyUp} = vennCongruousness(\@{$specialMatrixAll{$keyUp}}); - } - } - # group all nodes which has the same predecessor id and sum up the values - while ( my($key2, $value2) = each %{$container[$i]{$key}} ){ - if(exists $helperHash{$key}){ - $helperHash{$key} = addValues($helperHash{$key}, $value2); - #push(@{$specialMatrixAll{$key}}, $value2); - #print $key . "\t" . $value2 . "\n"; - }else{ - $helperHash{$key} = $value2; - } - #print $key . "\t" . $value2 . "\n"; - push(@{$specialMatrixAll{$key}}, $value2); - } - } - } - return \@container, \@containerSpecial; -} - - -sub vennCongruousness{ - my $inSpecMatrix = $_[0]; - my $numOfSets = @numberOfSets - 1; - my @arrVal = (); my @matrix = (); - my @sum = (); - my $numVenn = 0; - my %actSet = (); - my %actOvp = (); - $actSet{"result"} = 0; - $actOvp{"result"} = 0; - - # (step 1) - sum up rows - foreach (@{$inSpecMatrix}){ - @arrVal = split(" ", $_); - $sum[0] += $arrVal[0]; $sum[1] += $arrVal[1]; $sum[2] += $arrVal[2]; - $sum[3] += $arrVal[3]; $sum[4] += $arrVal[4]; $sum[5] += $arrVal[5]; - - if($arrVal[0] > 0){ - if(!(exists $actSet{1})){ - $actSet{1} = 1; - $actSet{"result"} += 1; - } - } - if($arrVal[1] > 0){ - if(!(exists $actSet{2})){ - $actSet{2} = 1; - $actSet{"result"} += 1; - } - } - if($arrVal[2] > 0){ - if(!(exists $actSet{3})){ - $actSet{3} = 1; - $actSet{"result"} += 1; - } - } - if($arrVal[3] > 0){ - if(!(exists $actOvp{1})){ - $actOvp{1} = 1; - $actOvp{"result"} += 1; - } - } - if($arrVal[4] > 0){ - if(!(exists $actOvp{2})){ - $actOvp{2} = 1; - $actOvp{"result"} += 1; - } - } - if($arrVal[5] > 0){ - if(!(exists $actOvp{3})){ - $actOvp{3} = 1; - $actOvp{"result"} += 1; - } - } - } - - # (step 2) - calc ratios (-1) - my $i = 0; - foreach (@{$inSpecMatrix}){ - @arrVal = split(" ", $_); - for(my $j = 0 ; $j < @arrVal ; $j++){ # eventuell -1 da index von 0 - 6 anstatt 0 - 5 laeuft - # div zero ! - if($arrVal[$j] == 0){ - $matrix[$i][$j] = 0; - }else{ - #print $j . "\t" . $sum[$j] . " \t" . $arrVal[$j] . "\n"; - $matrix[$i][$j] = $sum[$j] / $arrVal[$j]; - } - } - $i++; - } - - $numVenn = $i; - # (step 3) - sum up data set ratios d1-d3 - @sum = (); - for(my $j = 0 ; $j < @matrix; $j++){ - $sum[0] += $matrix[$j][0]; $sum[1] += $matrix[$j][1]; $sum[2] += $matrix[$j][2]; - $sum[3] += $matrix[$j][3]; $sum[4] += $matrix[$j][4]; $sum[5] += $matrix[$j][5]; - } - # (step 4) - calc ratios -> max(d_i, #{V}) / min(d_i, #{V}) - my @condensedM = (); my $max = 0; my $min = 0; - for(my $j = 0 ; $j < @sum ; $j++){ - $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]]; - $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]]; - - if($min == 0){ - $sum[$j] = 0; - }else{ - #$sum[$j] = $max / $min; - $sum[$j] = $sum[$j] / $numVenn; - } - #print "-> " . $j . "\t" . $sum[$j] . "\t" . $max . "\t" . $min . "\n"; - } - # (step 5) - normalize values between zero and one -> [0..1] - for(my $j = 0 ; $j < @sum ; $j++){ - $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]]; - $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]]; - - if($max == 0){ - $sum[$j] = 0; - }else{ - $sum[$j] = $min / $max; - } - #print "=> " . $j . "\t" . $sum[$j] . "\t" . $min . "\t" . $max . "\n"; - } - # (step 6) - combine all decomposed values and create only one value - # case a: only one dataset -> $numOfSets == 1 - if($numOfSets == 1){ - #print "res: " . $sum[0] . "\n"; - return $sum[0]; - }elsif($numOfSets == 2){ - # evtl fallunteruntescheiung - print "sum1: " . $sum[0] . "\t" . "sum2: " . $sum[1] . "\t" . "ovp1-2: " . $sum[3] . "\t" . "sets: " . $actSet{"result"} . "\t" . "ovp: " . $actOvp{"result"} . "\n"; - if($actOvp{"result"} == 0){ - my $t = ((($sum[0] + $sum[1]) / $actSet{"result"}) ); - #print "res2 " . $t . " ***\n"; - return ((($sum[0] + $sum[1]) / $actSet{"result"}) ); - }else{ - print "foobar\n"; - my $t = ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) ); - #print "res2* " . $t . " ***\n"; - return ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) ); - } - }elsif($numOfSets == 3){ - #print $sum[0] . "\t" . $sum[1] . "\t" . $sum[2] . "\t" . $sum[3] . "\t" . $sum[4] . "\t" . $sum[5] . "\n"; - #print $actSet{"result"} . "\t" . $actOvp{"result"} . "\n"; - #return ((((($sum[0] + $sum[1] + $sum[2]) / $numOfSets) + (($sum[3] + $sum[4] + $sum[5]) / $numOfSets) ) / 2) ); - - if($actOvp{"result"} == 0){ - my $t = (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}); - #print ">>>>>>> " . $t. "\n"; - return (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}); - }else{ - my $t = ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2)); - #print ">>>>>>> " . $t. "\n"; - return ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2)); - } - }else{ - return -1; - } -} - - -# save version of function clusterVennBottomUp() -#sub clusterVennBottomUp{ -# # transform $modifiedInput into datastructure -# # container => [deep]{parent}{child} -# my @container = (); -# my $maxDeep = 0; -# my %helperHash = (); -# -# foreach(@{$modifiedInput}){ -# my @tmpArr = split('\t', $_); -# my @path = split(';' , $tmpArr[0]); -# my $deep = @path - 1; -# -# if(($deep - 1) >= 0){ -# $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1]; -# }else{ -# $container[$deep]{"no"}{$path[-1]} = $tmpArr[1]; -# } -# } -# # start computation from the deepest path to the root node -# for(my $i = (@container-1) ; $i >= 0 ; $i--){ -# while ( my($key, $value) = each %{$container[$i]} ){ -# # update all predecessor nodes -# while ( my($keyUp, $valueUp) = each %helperHash ){ -# if(exists $container[$i]{$key}{$keyUp}){ -# $container[$i]{$key}{$keyUp} = $valueUp; -# } -# } -# # group all nodes which has the same predecessor id and sum up the values -# while ( my($key2, $value2) = each %{$container[$i]{$key}} ){ -# if(exists $helperHash{$key}){ -# $helperHash{$key} = addValues($helperHash{$key}, $value2); -# }else{ -# $helperHash{$key} = $value2; -# } -# } -# } -# } -# return \@container; -#} - - - -sub addValues{ - my $val1 = $_[0]; - my $val2 = $_[1]; - - my @sV1 = split(" ", $val1); - my @sV2 = split(" ", $val2); - - my $tmp = $sV1[0] + $sV2[0]; - my $out = $tmp; - - for(my $i = 1 ; $i < @sV1 ; $i++){ - $tmp = $sV1[$i] + $sV2[$i]; - $out .= " " . $tmp; - } - return $out; -} - - - -# detect non leaf nodes and remove the values -# works on @input_file !!! -# this version works only with 3 depths! -sub detectNonLeafs{ - my %recursiveValues = (); - my @modifiedFile = (); - my $convertedPath = ""; - - my @additionalNetwork = (); - - # read last line - my @tmpArr1 = split('\t',$input_file[($#input_file)],2); - # -2 path direction from reward instead from the beginning. (-1 leaf,child , -2 parent,inner node) - my $parent1 = getId($tmpArr1[0],-2); - my $child1 = getId($tmpArr1[0],-1); - my $deep1 = getPathDeep($tmpArr1[0]); - my $parent2 = ""; - my $child2 = ""; - my $deep2 = 0; - - - # if "if-statement is true, only root node exists" - my $outStr = ""; - if($parent1 == -1){ - $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); - push(@modifiedFile, $outStr); - }else{ - $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); - push(@modifiedFile, $outStr); - - for(my $i = (@input_file-2) ; $i >= 0 ; $i--){ - @tmpArr1 = split('\t',$input_file[$i],2); - $parent2 = getId($tmpArr1[0],-2); - $child2 = getId($tmpArr1[0],-1); - $deep2 = getPathDeep($tmpArr1[0]); - - #print $parent2 . "\t" . $child2 . "\n"; - - # if eq true -> new leaf - if($parent2 eq $parent1){ - $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); - push(@modifiedFile, $outStr); - }elsif($parent1 eq $child2){ - $outStr = convertPath($tmpArr1[0]) . "\t" . "undef"; - push(@modifiedFile, $outStr); - - my @check = split('\t', $tmpArr1[1]); - my $tSum = 0; - foreach(@check){ - $tSum += $_; - } - if(($onlyLeafs eq "off") && ($tSum > 0)){ - $outStr = convertPath($tmpArr1[0]) . "not_assigned_" . $child2 . ";" . "\t" . computeLeafValues($tmpArr1[1]); - push(@modifiedFile, $outStr); - $outStr = $child2 . " pp " . "not_assigned_" . $child2; - push(@network, $outStr); - } - }else{ - $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); - push(@modifiedFile, $outStr); - } - - if($parent1 == -1){ - push(@modifiedFile, convertPath($tmpArr1[0])); - last; - } - $parent1 = $parent2; - $child1 = $child2; - $deep1 = $deep2; - } - } - # store @additionalNetwork in .sif file!!! at this point, the sif file exists! - # it is stored into @network container. this container is globel defined! - - return \@modifiedFile; -} - - -# helper function for detectNonLeafs -sub getPathDeep{ - my $inPath = $_[0]; - my @deep = split(';', $inPath); - my $size = $#deep; - return $size; -} - -sub convertPath{ - my $inString = $_[0]; - $inString =~ s/"//g; - $inString =~ s/\s+/_/g; - return $inString; -} - -sub getId{ - my $lineToParse = $_[0]; - my $idPos = $_[1]; - my $stringId = ""; - my @path = (); - - $lineToParse =~ s/"//g; - $lineToParse =~ s/\s+/_/g; - @path = split(';',$lineToParse); - my $num = @path; - - if(($num + $idPos) < 0){ - return -1; - }else{ - return $path[$idPos]; - } -} - -sub computeLeafValues{ - my $meganValues = $_[0]; - my @rawValues = split('\t', $meganValues); - my @nodeRelVal = (); - - my $outValues = $rawValues[0] . " " . $rawValues[1] . " " . $rawValues[2]; - - if($rawValues[0] <= $rawValues[1]){ - $outValues .= " " . $rawValues[0]; - }else{ - $outValues .= " " . $rawValues[1]; - } - if($rawValues[0] <= $rawValues[2]){ - $outValues .= " " . $rawValues[0]; - }else{ - $outValues .= " " . $rawValues[2]; - } - if($rawValues[1] <= $rawValues[2]){ - $outValues .= " " . $rawValues[1]; - }else{ - $outValues .= " " . $rawValues[2]; - } - #my ($min, $max) = minmax @rawValues; - my $min = 0; - $outValues .= " " . $min; - - return $outValues; -} -# ----------------------------------------------------------------------------- - - -# compute network (.sif) -sub addToNetwork{ - my $inLine = $_[0]; - my @splitInLine = split('\t',$inLine); - # remove ' " ' from line - $splitInLine[0] =~ s/"//g; - $splitInLine[0] =~ s/\s+/_/g; - my @elements = split(';' ,$splitInLine[0]); - - if(@elements > 1){ - my $outString = $elements[-2] . " pp " . $elements[-1]; - push(@network, $outString); - } -} - - -# store network in .sif file -sub storeNetwork{ - # test - my $tmpFileName = $out_network; - - #my $tmpFileName = "./network.sif"; - open(FILE , ">$tmpFileName") || die "File can't be written - \"sif - File\"!\n"; - print FILE join("\n", @network) . "\n"; - close(FILE); -} - - -# --------------------------------------------------------------------------------------------- -# two different lookup-tables are available! -# lookupPixel() => static ; lookupPixelSQRT() => dynamic -# -# lookup absolute node-size to pixel (frame-size for venn-diagram) -sub lookupPixel{ - my $query = $_[0]; - - if($query < 10){ - return 30; - }elsif($query < 100){ - return 40; - }elsif($query < 1000){ - return 50; - }elsif($query < 10000){ - return 60; - }elsif($query < 100000){ - return 80; - }elsif($query < 1000000){ - return 100; - }elsif($query < 10000000){ - return 140; - }elsif($query < 20000000){ - return 180; - }elsif($query < 30000000){ - return 220; - }else{ - return 250; - } -} - -# lookup absolute node-size to pixel (frame-size for venn-diagram) <- this is currently used! -sub lookupPixelSQRT{ - - if ($transFnc == 0) { - return int(($_[0] ** (1/(1.6))) * 1.8 + 8); # 3,000 datapoints in sum - }elsif($transFnc == 1){ - return int(($_[0] ** (1/(2.1))) * 1.8 + 8); # 30,000 datapoints in sum - }elsif($transFnc == 2){ - return int(($_[0] ** (1/(2.6))) * 1.8 + 8); # 300,000 datapoints in sum - }elsif($transFnc == 3){ - return int(($_[0] ** (1/(3.1))) * 1.8 + 8); # 3,000,000 datapoints in sum - }elsif($transFnc == 4){ - return int(($_[0] ** (1/(3.7))) * 1.8 + 8); # 30,000,000 datapoints in sum - }elsif($transFnc == 5){ - return int(($_[0] ** (1/(4))) * 1.8 + 8); # 300,000,000 datapoints in sum - }elsif($transFnc == 6){ - return int(($_[0] ** (1/(4.7))) * 1.8 + 8); # 3,000,000,000 datapoints in sum - } - - #return int(($_[0] ** (1/(3.3))) * 1.8 + 30); # test version for small and large datasets? - #return int(($_[0] ** (1/(3.3))) * 1.8 + 5); # test version for small and large datasets? - #return int(($_[0] ** (1/(4))) * 1.8 + 8); # test version for small and large datasets? - #return int(($_[0] ** (1/6)) * 12); # old version this version is good for large datasets -} - - - - - - - -
--- a/coVennTree/coVennTree/coVennTree.xml Fri Dec 12 10:24:00 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,143 +0,0 @@ -<tool id="coVennTree" name="CoVennTree (Comparative weighted Venn Tree) - Rooted Tree" version="1.6.0"> - <description>Comparative rooted tree analysis for files in dsv format</description> - <requirements> - <requirement type="package" version="1.6">coVennTree</requirement> - <requirement type="package" version="5.18.1">perl</requirement> - </requirements> - <command interpreter="perl"> - coVennTree.pl - $infile - $color_mode - $trans_func - $leafs_allInformation - $outfile_network - $outfile_attribute - </command> - - <inputs> - <param name="infile" type="data" format="tabular" label="Path File" help="Tabular file containing the paths and values"/> - - - <param name="color_mode" multiple="false" type="select" label="Select color mode for Venn diagrams"> - <option value="0">(1) Set1: blue Set2: red Set3: yellow</option> - <option value="1">(2) Set1: red Set2: green Set3: blue</option> - <option value="2">(3) Set1: green Set2: magenta Set3: blue</option> - <option value="3">(4) Set1: green Set2: purple Set3: red</option> - <option value="4">(5) Set1: dark gray Set2: mid-grey Set3: light gray</option> - </param> - - - <param name="trans_func" multiple="false" type="select" label="Select transformation function"> - <option value="0">(1) datasets max: 3,000 data points in sum</option> - <option value="1">(2) datasets max: 30,000 data points in sum</option> - <option value="2">(3) datasets max: 300,000 data points in sum</option> - <option value="3">(4) datasets max: 3,000,000 data points in sum</option> - <option value="4">(5) datasets max: 30,000,000 data points in sum</option> - <option value="5">(6) datasets max: 300,000,000 data points in sum</option> - <option value="6">(7) datasets max: 3,000,000,000 data points in sum</option> - </param> - - - <param name="leafs_allInformation" multiple="false" type="select" label="Select tree analyzes function"> - <option value="1">(1) leaf + inner nodes informations</option> - <option value="0">(2) only leaf information</option> - </param> - - </inputs> - - <outputs> - <data format="tabular" name="outfile_network" label="Network" /> - <data format="tabular" name="outfile_attribute" label="Attributes" /> - </outputs> - - <tests> - <test> - </test> - </tests> - - <help> -.. class:: infomark - -CoVennTree compares up to three rooted trees at the same time. - -CoVennTree (Comparative weighted Venn Tree) is a software to analyze and compare up to three datasets. Unlike other -methods, CoVennTree correlates data on the leaf level and transfers this information to the root node. CoVennTree works with numbers to compute weighted -Venn diagrams for each node in the graph (rooted tree). Therefore any kind of input data can be processed as long as the data structure will be taken into account. - - - -**Input** - -*Input example* - - -.. image:: $PATH_TO_IMAGES/example1.png - :height: 430 - :width: 600 - - -*dsv-format: The following table represents the graph.* - - -=========== ====== ====== ====== -#Datasets set1 set2 set3 -=========== ====== ====== ====== -"root;" 0 0 0 -"root;A;" 10000 0 0 -"root;A;C;" 600000 300000 500000 -"root;A;D;" 0 100000 200000 -"root;A;E;" 800000 0 100000 -"root;B;" 10000 20000 50000 -=========== ====== ====== ====== - - -------- - - -**Results** - -A specific color is assigned to each dataset in five optional color schemes (see parameter "Select color mode for weighted Venn diagrams"). -In this example set1 corresponds to color blue, set2 to red and set3 to yellow. -In order to cover a wide numerical range a non linear transformation function is used. - - -*Data format \*.sif* - -[parent_node] [connected_with] [child_node] - - -*Data format \*.venn* - -[id] [google_url] [id_vds] [Venn_abs_values] - - -*Output example "leaf information and not assigned information"* - -By selecting "leaf information + not assigned information" artificial nodes can be inserted. -Artificial nodes will be inserted if inner nodes have values larger than zero. - -.. image:: $PATH_TO_IMAGES/venn-graph-off.png - :height: 358 - :width: 425 - - -------- - - -*Output example "only leaf information"* - -By selecting "only leaf information" only leaf nodes are considered for the computation of weighted Venn diagrams. - -.. image:: $PATH_TO_IMAGES/venn-graph-on.png - :height: 358 - :width: 400 - - - - </help> - <citations> - <citation type="doi"> - - </citation>> - </citations> -</tool>
--- a/coVennTree/coVennTree/tool_dependencies.xml Fri Dec 12 10:24:00 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="perl" version="5.18.1"> - <repository changeset_revision="a1a111b9faa5" name="package_perl_5_18" owner="iuc" prior_installation_required="True" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="coVennTree" version="1.6"> - <install version="1.0"> - <actions> - <action type="setup_perl_environment"> - <repository changeset_revision="a1a111b9faa5" name="package_perl_5_18" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"> - <package name="perl" version="5.18.1" /> - </repository> - <!-- allow downloading and installing an Perl package from cpan.org--> - <package>http://search.cpan.org/CPAN/authors/id/A/AD/ADAMK/List-MoreUtils-0.33.tar.gz</package> - </action> - </actions> - </install> - <readme> - </readme> - </package> -</tool_dependency>