view venn_diagram.xml @ 0:3d1097835b2f draft default tip

Imported from capsule None
author jjohnson
date Mon, 22 Sep 2014 11:54:41 -0400
parents
children
line wrap: on
line source

<tool name="Venn Diagram" id="ceas_venn" version="0.1.0">
  <description>Given 2 or 3 intervals, generate a venn diagram of their intersections</description>
  <macros>
    <import>corr_macros.xml</import>
  </macros>
  <expand macro="requirements" />
  <command>
#if not bool(str($bf1label))
#set $bf1label=$bfile1.name
#end if 
#if not bool(str($bf2label))
#set $bf2label=$bfile2.name
#end if 
#if not bool(str($bf3label))
#set $bf3label=$bfile3.name
#end if 
    venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label"  &amp;>  $log;
  </command>
  <inputs>
    <param ftype="interval" format="bed" name="bfile1" type="data" label="BED file 1(total number of lines in the 3 files should not exceed 100,000)"/>
    <param name="bf1label" type="text" label="Bed file 1 label:" optional="true"/>
    <param ftype="interval" format="bed" name="bfile2" type="data" label="BED file 2(total number of lines in the 3 files should not exceed 100,000)"/>
    <param name="bf2label" type="text" label="Bed file 2 label:" optional="true"/>
    <param ftype="interval" format="bed" name="bfile3" type="data" label="BED file 3(total number of lines in the 3 files should not exceed 100,000)" optional="true"/>
    <param name="bf3label" type="text" label="Bed file 3 label:" optional="true"/>
    <param name="height" type="integer" label="Pic Height" value="500">
      <validator type="in_range" max="5000" min="500" message="Pic Height is out of range, Pic Height has to be between 500 to 5000" />
    </param>  
    <param name="width" type="integer" label="Pic Width" value="500">
      <validator type="in_range" max="5000" min="500" message="Pic Width is out of range, Pic Width has to be between 500 to 5000" />
    </param> 
    <param name="title" type="text" label="Diagram title" value="Bed Venn Diagram"/>
    <param name="porportional" type="boolean" label="Draw Porportional Diagrams" checked="yes"
	   truevalue="" falsevalue="-n" />
  </inputs>
  <outputs>
    <data format="png" name="output" from_work_dir="venn_diagram.png"/>
    <data format="txt" name="log" label="log file" />
  </outputs>
  <expand macro="stdio"/>
  <configfiles>
    <configfile name="shscript">
#!/bin/bash
#import os

#include source=$script_chars#

#set $path = os.path.abspath($__app__.config.tool_path)

##Note: the third bed file is optional, if it is not specified, we duplicate
##the second file as the third argument
THIRD=$bfile3
if [ $bfile3 = "None" ]; then ##duplicate the second arg
    THIRD=$bfile2
    lines=`wc -l $bfile1 $bfile2 | tail -1 | awk '{print ${dollar}1}'`
else
    lines=`wc -l $bfile1 $bfile2 ${dollar}THIRD | tail -1 | awk '{print ${dollar}1}'`
fi

##check the format of the 3 bed files
format1='passed' ##`$path/validation/fcfunc.py $bfile1`
format2='passed' ##`$path/validation/fcfunc.py $bfile2`
format3='passed' ##`$path/validation/fcfunc.py ${dollar}THIRD`

#if not bool(str($bf1label))
#set $bf1label=$bfile1.name
#end if 
#if not bool(str($bf2label))
#set $bf2label=$bfile2.name
#end if 
#if not bool(str($bf3label))
#set $bf3label=$bfile3.name
#end if 

#set $tlablelen1=len(str($bf1label))
#set $tlablelen2=len(str($bf2label))
#set $tlablelen3=len(str($bf3label))
#set $tlablelen4=len(str($title))

if [[ ${dollar}lines -gt 100000 ]];then
    echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2;
    exit;
elif [[ ${dollar}format1 != "passed" ]];then
    echo "BED file 1: ${dollar}format1" ${gt}${ad}2;
    exit;
elif [[ ${dollar}format2 != "passed" ]];then
    echo "BED file 2: ${dollar}format2" ${gt}${ad}2;
    exit;
elif [[ ${dollar}format3 != "passed" ]];then
    echo "BED file 3: ${dollar}format3" ${gt}${ad}2;
    exit;
elif [[ $tlablelen1 -gt 255 ]];then
    echo "Bed file 1 label is too long! 255 characters is at most!" ${gt}${ad}2
    exit;
elif [[ $tlablelen2 -gt 255 ]];then
    echo "Bed file 2 label is too long! 255 characters is at most!" ${gt}${ad}2
    exit;
elif [[ $tlablelen3 -gt 255 ]];then
    echo "Bed file 3 label is too long! 255 characters is at most!" ${gt}${ad}2
    exit;
elif [[ $tlablelen4 -gt 255 ]];then
    echo "Diagram title is too long! 255 characters is at most!" ${gt}${ad}2
    exit;
else
    venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label"  ${gt}${ad} $log;
    cp venn_diagram.png $output;
fi
    </configfile>
  </configfiles>
<tests>
  <test maxseconds="3600" name="VennDiagram_1">
    <param name="bfile1" value="bedfile1.bed" />
    <param name="bf1label" value="BedFile1" />
    <param name="bfile2" value="bedfile2.bed" />
    <param name="bf2label" value="BedFile2" />
    <param name="bfile3" value="bedfile3.bed" />
    <param name="bf3label" value="BedFile3" />
    <param name="height" value="500" />
    <param name="width" value="500" />
    <param name="title" value="BedVennDiagram" />
    <param name="porportional" value="" />
    <output name="output" file="venndiagram_1/venndiagram_1.png" lines_diff = "40" />
    <output name="output" file="venndiagram_1/venndiagram_1.log" lines_diff = "200" />
  </test>
  <test maxseconds="3600" name="VennDiagram_2">
    <param name="bfile1" value="bedfile1.bed" />
    <param name="bf1label" value="BedFile1" />
    <param name="bfile2" value="bedfile2.bed" />
    <param name="bf2label" value="BedFile2" />
    <param name="bfile3" value="bedfile3.bed" />
    <param name="bf3label" value="BedFile3" />
    <param name="height" value="5000" />
    <param name="width" value="5000" />
    <param name="title" value="BedVennDiagram" />
    <param name="porportional" value="" />
    <output name="output" file="venndiagram_2/venndiagram_2.png" lines_diff = "40" />
    <output name="output" file="venndiagram_2/venndiagram_2.log" lines_diff = "200" />
  </test>
</tests>
  <help>
This tool generates a venn diagram of the intersection of multiple intervals 
files. The original code is written by Jacqueline Wentz and revised by
Tao Liu. It will calculate how many regions are overlapped between BED
files and use the Google Chart API to draw the final figure.

.. class:: warningmark

**CAUTION:** When three data sets are used, and their sizes differ a
lot. The figure may not be correct. But the numbers are correct.

**CAUTION:** The maximum number of lines in all the input files should
not exceed 100,000.

.. class:: warningmark

**NEED IMPROVEMENT**

-----

**Parameters**

- **BED file 1 and 2** are the two BED files to be used to calculate
  the overlap.
- **BED file 3** is the third BED file to be used. It's optional.
- **BED file labels** name of the datasets displayed on the diagram
  leave blank to use the default name
- **Pic Height** is the height of the final image.
- **Pic Width** is the width of the final image.
- **Diagram title** is the title of the final image.

-----

**Outputs**

- **PNG file** is the Venn diagram plot. The numbers of overlapped
  regions are included in the figure.
- **LOG file** is the job log. If you see errors, please attach this in
  the bug report.

-----

**summary**

For the regions in bed file, it will do a cluster first. 
For example, we have 5 regions

bed A:
(200, 900)
(1000, 1200)

bed B:
(100, 300)
(700, 1100)
(1400, 1500)

Since they have overlaps, It will cluster them into 2 regions: 
(100, 1200) (1400, 1500), and see whether each region 
(of the 2 regions) is included in bed A or B. So,

(100, 1200) is included in bed A, B

(1400,1500) is included in bed B

Then we draw the venn diagram
  </help>

</tool>