diff venn_diagram.xml @ 0:3d1097835b2f draft default tip

Imported from capsule None
author jjohnson
date Mon, 22 Sep 2014 11:54:41 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/venn_diagram.xml	Mon Sep 22 11:54:41 2014 -0400
@@ -0,0 +1,207 @@
+<tool name="Venn Diagram" id="ceas_venn" version="0.1.0">
+  <description>Given 2 or 3 intervals, generate a venn diagram of their intersections</description>
+  <macros>
+    <import>corr_macros.xml</import>
+  </macros>
+  <expand macro="requirements" />
+  <command>
+#if not bool(str($bf1label))
+#set $bf1label=$bfile1.name
+#end if 
+#if not bool(str($bf2label))
+#set $bf2label=$bfile2.name
+#end if 
+#if not bool(str($bf3label))
+#set $bf3label=$bfile3.name
+#end if 
+    venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label"  &amp;>  $log;
+  </command>
+  <inputs>
+    <param ftype="interval" format="bed" name="bfile1" type="data" label="BED file 1(total number of lines in the 3 files should not exceed 100,000)"/>
+    <param name="bf1label" type="text" label="Bed file 1 label:" optional="true"/>
+    <param ftype="interval" format="bed" name="bfile2" type="data" label="BED file 2(total number of lines in the 3 files should not exceed 100,000)"/>
+    <param name="bf2label" type="text" label="Bed file 2 label:" optional="true"/>
+    <param ftype="interval" format="bed" name="bfile3" type="data" label="BED file 3(total number of lines in the 3 files should not exceed 100,000)" optional="true"/>
+    <param name="bf3label" type="text" label="Bed file 3 label:" optional="true"/>
+    <param name="height" type="integer" label="Pic Height" value="500">
+      <validator type="in_range" max="5000" min="500" message="Pic Height is out of range, Pic Height has to be between 500 to 5000" />
+    </param>  
+    <param name="width" type="integer" label="Pic Width" value="500">
+      <validator type="in_range" max="5000" min="500" message="Pic Width is out of range, Pic Width has to be between 500 to 5000" />
+    </param> 
+    <param name="title" type="text" label="Diagram title" value="Bed Venn Diagram"/>
+    <param name="porportional" type="boolean" label="Draw Porportional Diagrams" checked="yes"
+	   truevalue="" falsevalue="-n" />
+  </inputs>
+  <outputs>
+    <data format="png" name="output" from_work_dir="venn_diagram.png"/>
+    <data format="txt" name="log" label="log file" />
+  </outputs>
+  <expand macro="stdio"/>
+  <configfiles>
+    <configfile name="shscript">
+#!/bin/bash
+#import os
+
+#include source=$script_chars#
+
+#set $path = os.path.abspath($__app__.config.tool_path)
+
+##Note: the third bed file is optional, if it is not specified, we duplicate
+##the second file as the third argument
+THIRD=$bfile3
+if [ $bfile3 = "None" ]; then ##duplicate the second arg
+    THIRD=$bfile2
+    lines=`wc -l $bfile1 $bfile2 | tail -1 | awk '{print ${dollar}1}'`
+else
+    lines=`wc -l $bfile1 $bfile2 ${dollar}THIRD | tail -1 | awk '{print ${dollar}1}'`
+fi
+
+##check the format of the 3 bed files
+format1='passed' ##`$path/validation/fcfunc.py $bfile1`
+format2='passed' ##`$path/validation/fcfunc.py $bfile2`
+format3='passed' ##`$path/validation/fcfunc.py ${dollar}THIRD`
+
+#if not bool(str($bf1label))
+#set $bf1label=$bfile1.name
+#end if 
+#if not bool(str($bf2label))
+#set $bf2label=$bfile2.name
+#end if 
+#if not bool(str($bf3label))
+#set $bf3label=$bfile3.name
+#end if 
+
+#set $tlablelen1=len(str($bf1label))
+#set $tlablelen2=len(str($bf2label))
+#set $tlablelen3=len(str($bf3label))
+#set $tlablelen4=len(str($title))
+
+if [[ ${dollar}lines -gt 100000 ]];then
+    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
+    exit;
+elif [[ ${dollar}format1 != "passed" ]];then
+    echo "BED file 1: ${dollar}format1" ${gt}${ad}2;
+    exit;
+elif [[ ${dollar}format2 != "passed" ]];then
+    echo "BED file 2: ${dollar}format2" ${gt}${ad}2;
+    exit;
+elif [[ ${dollar}format3 != "passed" ]];then
+    echo "BED file 3: ${dollar}format3" ${gt}${ad}2;
+    exit;
+elif [[ $tlablelen1 -gt 255 ]];then
+    echo "Bed file 1 label is too long! 255 characters is at most!" ${gt}${ad}2
+    exit;
+elif [[ $tlablelen2 -gt 255 ]];then
+    echo "Bed file 2 label is too long! 255 characters is at most!" ${gt}${ad}2
+    exit;
+elif [[ $tlablelen3 -gt 255 ]];then
+    echo "Bed file 3 label is too long! 255 characters is at most!" ${gt}${ad}2
+    exit;
+elif [[ $tlablelen4 -gt 255 ]];then
+    echo "Diagram title is too long! 255 characters is at most!" ${gt}${ad}2
+    exit;
+else
+    venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label"  ${gt}${ad} $log;
+    cp venn_diagram.png $output;
+fi
+    </configfile>
+  </configfiles>
+<tests>
+  <test maxseconds="3600" name="VennDiagram_1">
+    <param name="bfile1" value="bedfile1.bed" />
+    <param name="bf1label" value="BedFile1" />
+    <param name="bfile2" value="bedfile2.bed" />
+    <param name="bf2label" value="BedFile2" />
+    <param name="bfile3" value="bedfile3.bed" />
+    <param name="bf3label" value="BedFile3" />
+    <param name="height" value="500" />
+    <param name="width" value="500" />
+    <param name="title" value="BedVennDiagram" />
+    <param name="porportional" value="" />
+    <output name="output" file="venndiagram_1/venndiagram_1.png" lines_diff = "40" />
+    <output name="output" file="venndiagram_1/venndiagram_1.log" lines_diff = "200" />
+  </test>
+  <test maxseconds="3600" name="VennDiagram_2">
+    <param name="bfile1" value="bedfile1.bed" />
+    <param name="bf1label" value="BedFile1" />
+    <param name="bfile2" value="bedfile2.bed" />
+    <param name="bf2label" value="BedFile2" />
+    <param name="bfile3" value="bedfile3.bed" />
+    <param name="bf3label" value="BedFile3" />
+    <param name="height" value="5000" />
+    <param name="width" value="5000" />
+    <param name="title" value="BedVennDiagram" />
+    <param name="porportional" value="" />
+    <output name="output" file="venndiagram_2/venndiagram_2.png" lines_diff = "40" />
+    <output name="output" file="venndiagram_2/venndiagram_2.log" lines_diff = "200" />
+  </test>
+</tests>
+  <help>
+This tool generates a venn diagram of the intersection of multiple intervals 
+files. The original code is written by Jacqueline Wentz and revised by
+Tao Liu. It will calculate how many regions are overlapped between BED
+files and use the Google Chart API to draw the final figure.
+
+.. class:: warningmark
+
+**CAUTION:** When three data sets are used, and their sizes differ a
+lot. The figure may not be correct. But the numbers are correct.
+
+**CAUTION:** The maximum number of lines in all the input files should
+not exceed 100,000.
+
+.. class:: warningmark
+
+**NEED IMPROVEMENT**
+
+-----
+
+**Parameters**
+
+- **BED file 1 and 2** are the two BED files to be used to calculate
+  the overlap.
+- **BED file 3** is the third BED file to be used. It's optional.
+- **BED file labels** name of the datasets displayed on the diagram
+  leave blank to use the default name
+- **Pic Height** is the height of the final image.
+- **Pic Width** is the width of the final image.
+- **Diagram title** is the title of the final image.
+
+-----
+
+**Outputs**
+
+- **PNG file** is the Venn diagram plot. The numbers of overlapped
+  regions are included in the figure.
+- **LOG file** is the job log. If you see errors, please attach this in
+  the bug report.
+
+-----
+
+**summary**
+
+For the regions in bed file, it will do a cluster first. 
+For example, we have 5 regions
+
+bed A:
+(200, 900)
+(1000, 1200)
+
+bed B:
+(100, 300)
+(700, 1100)
+(1400, 1500)
+
+Since they have overlaps, It will cluster them into 2 regions: 
+(100, 1200) (1400, 1500), and see whether each region 
+(of the 2 regions) is included in bed A or B. So,
+
+(100, 1200) is included in bed A, B
+
+(1400,1500) is included in bed B
+
+Then we draw the venn diagram
+  </help>
+
+</tool>