Mercurial > repos > jjohnson > cistrome_correlation
view venn_diagram.xml @ 0:3d1097835b2f draft default tip
Imported from capsule None
author | jjohnson |
---|---|
date | Mon, 22 Sep 2014 11:54:41 -0400 |
parents | |
children |
line wrap: on
line source
<tool name="Venn Diagram" id="ceas_venn" version="0.1.0"> <description>Given 2 or 3 intervals, generate a venn diagram of their intersections</description> <macros> <import>corr_macros.xml</import> </macros> <expand macro="requirements" /> <command> #if not bool(str($bf1label)) #set $bf1label=$bfile1.name #end if #if not bool(str($bf2label)) #set $bf2label=$bfile2.name #end if #if not bool(str($bf3label)) #set $bf3label=$bfile3.name #end if venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label" &> $log; </command> <inputs> <param ftype="interval" format="bed" name="bfile1" type="data" label="BED file 1(total number of lines in the 3 files should not exceed 100,000)"/> <param name="bf1label" type="text" label="Bed file 1 label:" optional="true"/> <param ftype="interval" format="bed" name="bfile2" type="data" label="BED file 2(total number of lines in the 3 files should not exceed 100,000)"/> <param name="bf2label" type="text" label="Bed file 2 label:" optional="true"/> <param ftype="interval" format="bed" name="bfile3" type="data" label="BED file 3(total number of lines in the 3 files should not exceed 100,000)" optional="true"/> <param name="bf3label" type="text" label="Bed file 3 label:" optional="true"/> <param name="height" type="integer" label="Pic Height" value="500"> <validator type="in_range" max="5000" min="500" message="Pic Height is out of range, Pic Height has to be between 500 to 5000" /> </param> <param name="width" type="integer" label="Pic Width" value="500"> <validator type="in_range" max="5000" min="500" message="Pic Width is out of range, Pic Width has to be between 500 to 5000" /> </param> <param name="title" type="text" label="Diagram title" value="Bed Venn Diagram"/> <param name="porportional" type="boolean" label="Draw Porportional Diagrams" checked="yes" truevalue="" falsevalue="-n" /> </inputs> <outputs> <data format="png" name="output" from_work_dir="venn_diagram.png"/> <data format="txt" name="log" label="log file" /> </outputs> <expand macro="stdio"/> <configfiles> <configfile name="shscript"> #!/bin/bash #import os #include source=$script_chars# #set $path = os.path.abspath($__app__.config.tool_path) ##Note: the third bed file is optional, if it is not specified, we duplicate ##the second file as the third argument THIRD=$bfile3 if [ $bfile3 = "None" ]; then ##duplicate the second arg THIRD=$bfile2 lines=`wc -l $bfile1 $bfile2 | tail -1 | awk '{print ${dollar}1}'` else lines=`wc -l $bfile1 $bfile2 ${dollar}THIRD | tail -1 | awk '{print ${dollar}1}'` fi ##check the format of the 3 bed files format1='passed' ##`$path/validation/fcfunc.py $bfile1` format2='passed' ##`$path/validation/fcfunc.py $bfile2` format3='passed' ##`$path/validation/fcfunc.py ${dollar}THIRD` #if not bool(str($bf1label)) #set $bf1label=$bfile1.name #end if #if not bool(str($bf2label)) #set $bf2label=$bfile2.name #end if #if not bool(str($bf3label)) #set $bf3label=$bfile3.name #end if #set $tlablelen1=len(str($bf1label)) #set $tlablelen2=len(str($bf2label)) #set $tlablelen3=len(str($bf3label)) #set $tlablelen4=len(str($title)) if [[ ${dollar}lines -gt 100000 ]];then echo "Total lines of the files exceed the limit of 100000 lines!" ${gt}${ad}2; exit; elif [[ ${dollar}format1 != "passed" ]];then echo "BED file 1: ${dollar}format1" ${gt}${ad}2; exit; elif [[ ${dollar}format2 != "passed" ]];then echo "BED file 2: ${dollar}format2" ${gt}${ad}2; exit; elif [[ ${dollar}format3 != "passed" ]];then echo "BED file 3: ${dollar}format3" ${gt}${ad}2; exit; elif [[ $tlablelen1 -gt 255 ]];then echo "Bed file 1 label is too long! 255 characters is at most!" ${gt}${ad}2 exit; elif [[ $tlablelen2 -gt 255 ]];then echo "Bed file 2 label is too long! 255 characters is at most!" ${gt}${ad}2 exit; elif [[ $tlablelen3 -gt 255 ]];then echo "Bed file 3 label is too long! 255 characters is at most!" ${gt}${ad}2 exit; elif [[ $tlablelen4 -gt 255 ]];then echo "Diagram title is too long! 255 characters is at most!" ${gt}${ad}2 exit; else venn_diagram.py $bfile1 $bfile2 $bfile3 -H $height -W $width -t "$title" -l "$bf1label" -l "$bf2label" -l "$bf3label" ${gt}${ad} $log; cp venn_diagram.png $output; fi </configfile> </configfiles> <tests> <test maxseconds="3600" name="VennDiagram_1"> <param name="bfile1" value="bedfile1.bed" /> <param name="bf1label" value="BedFile1" /> <param name="bfile2" value="bedfile2.bed" /> <param name="bf2label" value="BedFile2" /> <param name="bfile3" value="bedfile3.bed" /> <param name="bf3label" value="BedFile3" /> <param name="height" value="500" /> <param name="width" value="500" /> <param name="title" value="BedVennDiagram" /> <param name="porportional" value="" /> <output name="output" file="venndiagram_1/venndiagram_1.png" lines_diff = "40" /> <output name="output" file="venndiagram_1/venndiagram_1.log" lines_diff = "200" /> </test> <test maxseconds="3600" name="VennDiagram_2"> <param name="bfile1" value="bedfile1.bed" /> <param name="bf1label" value="BedFile1" /> <param name="bfile2" value="bedfile2.bed" /> <param name="bf2label" value="BedFile2" /> <param name="bfile3" value="bedfile3.bed" /> <param name="bf3label" value="BedFile3" /> <param name="height" value="5000" /> <param name="width" value="5000" /> <param name="title" value="BedVennDiagram" /> <param name="porportional" value="" /> <output name="output" file="venndiagram_2/venndiagram_2.png" lines_diff = "40" /> <output name="output" file="venndiagram_2/venndiagram_2.log" lines_diff = "200" /> </test> </tests> <help> This tool generates a venn diagram of the intersection of multiple intervals files. The original code is written by Jacqueline Wentz and revised by Tao Liu. It will calculate how many regions are overlapped between BED files and use the Google Chart API to draw the final figure. .. class:: warningmark **CAUTION:** When three data sets are used, and their sizes differ a lot. The figure may not be correct. But the numbers are correct. **CAUTION:** The maximum number of lines in all the input files should not exceed 100,000. .. class:: warningmark **NEED IMPROVEMENT** ----- **Parameters** - **BED file 1 and 2** are the two BED files to be used to calculate the overlap. - **BED file 3** is the third BED file to be used. It's optional. - **BED file labels** name of the datasets displayed on the diagram leave blank to use the default name - **Pic Height** is the height of the final image. - **Pic Width** is the width of the final image. - **Diagram title** is the title of the final image. ----- **Outputs** - **PNG file** is the Venn diagram plot. The numbers of overlapped regions are included in the figure. - **LOG file** is the job log. If you see errors, please attach this in the bug report. ----- **summary** For the regions in bed file, it will do a cluster first. For example, we have 5 regions bed A: (200, 900) (1000, 1200) bed B: (100, 300) (700, 1100) (1400, 1500) Since they have overlaps, It will cluster them into 2 regions: (100, 1200) (1400, 1500), and see whether each region (of the 2 regions) is included in bed A or B. So, (100, 1200) is included in bed A, B (1400,1500) is included in bed B Then we draw the venn diagram </help> </tool>