# HG changeset patch
# User peterjc
# Date 1368722229 14400
# Node ID 116ccf1c84d506e604e745709e35907e1853e263
# Parent aefc86eda5f63a8939f5ea19870e922d51e46b87
Uploaded v0.0.5
diff -r aefc86eda5f6 -r 116ccf1c84d5 tools/plotting/venn_list.py
--- a/tools/plotting/venn_list.py Tue Jun 07 17:42:40 2011 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-#!/usr/bin/env python
-"""Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
-
-This script is copyright 2010 by Peter Cock, The James Hutton Institute
-(formerly SCRI), UK. All rights reserved.
-See accompanying text file for licence details (MIT/BSD style).
-
-This is version 0.0.3 of the script.
-"""
-
-
-import sys
-import rpy
-
-def stop_err(msg, error_level=1):
- """Print error message to stdout and quit with given error level."""
- sys.stderr.write("%s\n" % msg)
- sys.exit(error_level)
-
-try:
- import rpy
-except ImportError:
- stop_err("Requires the Python library rpy (to call R)")
-
-try:
- rpy.r.library("limma")
-except:
- stop_err("Requires the R library limma (for vennDiagram function)")
-
-
-if len(sys.argv)-1 not in [7, 10, 13]:
- stop_err("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
-
-all_file, all_type, all_label = sys.argv[1:4]
-set_data = []
-if len(sys.argv)-1 >= 7:
- set_data.append(tuple(sys.argv[4:7]))
-if len(sys.argv)-1 >= 10:
- set_data.append(tuple(sys.argv[7:10]))
-if len(sys.argv)-1 >= 13:
- set_data.append(tuple(sys.argv[10:13]))
-pdf_file = sys.argv[-1]
-n = len(set_data)
-print "Doing %i-way Venn Diagram" % n
-
-def load_ids(filename, filetype):
- if filetype=="tabular":
- for line in open(filename):
- if not line.startswith("#"):
- yield line.rstrip("\n").split("\t",1)[0]
- elif filetype=="fasta":
- for line in open(filename):
- if line.startswith(">"):
- yield line[1:].rstrip("\n").split(None,1)[0]
- elif filetype.startswith("fastq"):
- #Use the Galaxy library not Biopython to cope with CS
- from galaxy_utils.sequence.fastq import fastqReader
- handle = open(filename, "rU")
- for record in fastqReader(handle):
- #The [1:] is because the fastaReader leaves the @ on the identifer.
- yield record.identifier.split()[0][1:]
- handle.close()
- elif filetype=="sff":
- try:
- from Bio.SeqIO import index
- except ImportError:
- stop_err("Require Biopython 1.54 or later (to read SFF files)")
- #This will read the SFF index block if present (very fast)
- for name in index(filename, "sff"):
- yield name
- else:
- stop_err("Unexpected file type %s" % filetype)
-
-def load_ids_whitelist(filename, filetype, whitelist):
- for name in load_ids(filename, filetype):
- if name in whitelist:
- yield name
- else:
- stop_err("Unexpected ID %s in %s file %s" % (name, filetype, filename))
-
-if all_file in ["", "-", '""', '"-"']:
- #Load without white list
- sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
- #Take union
- all = set()
- for s in sets:
- all.update(s)
- print "Inferred total of %i IDs" % len(all)
-else:
- all = set(load_ids(all_file, all_type))
- print "Total of %i IDs" % len(all)
- sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
-
-for s, (f,t,c) in zip(sets, set_data):
- print "%i in %s" % (len(s), c)
-
-#Now call R library to draw simple Venn diagram
-try:
- #Create dummy Venn diagram counts object for three groups
- cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
- rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
- rpy.r('colnames(groups) <- %s' % cols)
- rpy.r('vc <- vennCounts(groups)')
- #Populate the 2^n classes with real counts
- #Don't make any assumptions about the class order
- #print rpy.r('vc')
- for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
- if isinstance(row, int) or isinstance(row, float):
- #Hack for rpy being too clever for single element row
- row = [row]
- names = all
- for wanted, s in zip(row, sets):
- if wanted:
- names = names.intersection(s)
- else:
- names = names.difference(s)
- rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
- #print rpy.r('vc')
- if n == 1:
- #Single circle, don't need to add (Total XXX) line
- names = [c for (t,f,c) in set_data]
- else:
- names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
- rpy.r.assign("names", names)
- rpy.r.assign("colors", ["red","green","blue"][:n])
- rpy.r.pdf(pdf_file, 8, 8)
- rpy.r("""vennDiagram(vc, include="both", names=names,
- main="%s", sub="(Total %i)",
- circle.col=colors)
- """ % (all_label, len(all)))
- rpy.r.dev_off()
-except Exception, exc:
- stop_err( "%s" %str( exc ) )
-rpy.r.quit( save="no" )
-print "Done"
diff -r aefc86eda5f6 -r 116ccf1c84d5 tools/plotting/venn_list.txt
--- a/tools/plotting/venn_list.txt Tue Jun 07 17:42:40 2011 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-Galaxy tool to draw a Venn Diagram with up to 3 sets
-====================================================
-
-This tool is copyright 2011 by Peter Cock, The James Hutton Institute
-(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
-See the licence text below.
-
-This tool is a short Python script (using both the Galaxy and Biopython library
-functions) to extract ID lists from tabular, FASTA, FASTQ or SFF files to build
-sets, which are then drawn using the R limma package function vennDiagram
-(called from Python using rpy).
-
-There are just two files to install:
-
-* venn_list.py (the Python script)
-* venn_list.xml (the Galaxy tool definition)
-
-The suggested location is in the Galaxy folder tools/plotting next to other
-graph drawing tools.
-
-You will also need to modify the tools_conf.xml file to tell Galaxy to offer the
-tool. The suggested location is in the "Graph/Display Data" section. Simply add
-the line:
-
-
-
-You will also need to install Biopython 1.54 or later, and the R/Bioconductor
-pacakge limma. You should already have rpy installed for other Galaxy tools.
-
-
-History
-=======
-
-v0.0.3 - Initial public release.
-
-
-Developers
-==========
-
-This script and related tools are being developed on the following hg branch:
-http://bitbucket.org/peterjc/galaxy-central/src/tools
-
-For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
-the following command from the Galaxy root folder:
-
-tar -czf venn_list.tar.gz tools/plotting/venn_list.*
-
-Check this worked:
-
-$ tar -tzf venn_list.tar.gz
-tools/plotting/venn_list.py
-tools/plotting/venn_list.txt
-tools/plotting/venn_list.xml
-
-
-Licence (MIT/BSD style)
-=======================
-
-Permission to use, copy, modify, and distribute this software and its
-documentation with or without modifications and for any purpose and
-without fee is hereby granted, provided that any copyright notices
-appear in all copies and that both those copyright notices and this
-permission notice appear in supporting documentation, and that the
-names of the contributors or copyright holders not be used in
-advertising or publicity pertaining to distribution of the software
-without specific prior permission.
-
-THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
-WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
-CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
-OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
-OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
-OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
-OR PERFORMANCE OF THIS SOFTWARE.
diff -r aefc86eda5f6 -r 116ccf1c84d5 tools/plotting/venn_list.xml
--- a/tools/plotting/venn_list.xml Tue Jun 07 17:42:40 2011 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,113 +0,0 @@
-
- from lists
-
-venn_list.py
-#if $universe.type_select=="implicit":
- - -
-#else:
- $main $main.ext
-#end if
-"$main_lab"
-#for $s in $sets:
- $s.set $s.set.ext "$s.lab"
-#end for
-$PDF
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- rpy
- Bio
-
-
-
-
-
-
-
-.. class:: infomark
-
-**TIP:** If your data is in tabular files, the identifier is assumed to be in column one.
-
-**What it does**
-
-Draws Venn Diagram for one, two or three sets (as a PDF file).
-
-You must supply one, two or three sets of identifiers -- corresponding
-to one, two or three circles on the Venn Diagram.
-
-In general you should also give the full list of all the identifiers
-explicitly. This is used to calculate the number of identifers outside
-the circles (and check the identifiers in the other files match up).
-The full list can be omitted by implicitly taking the union of the
-category sets. In this case, the count outside the categories (circles)
-will always be zero.
-
-The identifiers can be taken from the first column of a tabular file
-(e.g. query names in BLAST tabular output, or signal peptide predictions
-after filtering, etc), or from a sequence file (FASTA, FASTQ, SFF).
-
-For example, you may have a set of NGS reads (as a FASTA, FASTQ or SFF
-file), and the results of several different read mappings (e.g. to
-different references) as tabular files (filtered to have just the mapped
-reads). You could then show the different mappings (and their overlaps)
-as a Venn Diagram, and the outside count would be the unmapped reads.
-
-**Citations**
-
-The Venn Diagrams are drawn using Gordon Smyth's limma package from
-R/Bioconductor, http://www.bioconductor.org/
-
-The R library is called from Python via rpy, http://rpy.sourceforge.net/
-
-This tool uses Biopython to read SFF files. If you use this tool with
-SFF files in scientific work leading to a publication, please cite the
-Biopython application note:
-
-Cock et al 2009. Biopython: freely available Python tools for computational
-molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
-
-
-
diff -r aefc86eda5f6 -r 116ccf1c84d5 venn_list-c96bef0643dc/tools/plotting/venn_list.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/venn_list-c96bef0643dc/tools/plotting/venn_list.py Thu May 16 12:37:09 2013 -0400
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+"""Plot up to 3-way Venn Diagram using R limma vennDiagram (via rpy)
+
+This script is copyright 2010 by Peter Cock, The James Hutton Institute
+(formerly SCRI), UK. All rights reserved.
+See accompanying text file for licence details (MIT/BSD style).
+
+This is version 0.0.3 of the script.
+"""
+
+
+import sys
+import rpy
+
+def stop_err(msg, error_level=1):
+ """Print error message to stdout and quit with given error level."""
+ sys.stderr.write("%s\n" % msg)
+ sys.exit(error_level)
+
+try:
+ import rpy
+except ImportError:
+ stop_err("Requires the Python library rpy (to call R)")
+
+try:
+ rpy.r.library("limma")
+except:
+ stop_err("Requires the R library limma (for vennDiagram function)")
+
+
+if len(sys.argv)-1 not in [7, 10, 13]:
+ stop_err("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
+
+all_file, all_type, all_label = sys.argv[1:4]
+set_data = []
+if len(sys.argv)-1 >= 7:
+ set_data.append(tuple(sys.argv[4:7]))
+if len(sys.argv)-1 >= 10:
+ set_data.append(tuple(sys.argv[7:10]))
+if len(sys.argv)-1 >= 13:
+ set_data.append(tuple(sys.argv[10:13]))
+pdf_file = sys.argv[-1]
+n = len(set_data)
+print "Doing %i-way Venn Diagram" % n
+
+def load_ids(filename, filetype):
+ if filetype=="tabular":
+ for line in open(filename):
+ if not line.startswith("#"):
+ yield line.rstrip("\n").split("\t",1)[0]
+ elif filetype=="fasta":
+ for line in open(filename):
+ if line.startswith(">"):
+ yield line[1:].rstrip("\n").split(None,1)[0]
+ elif filetype.startswith("fastq"):
+ #Use the Galaxy library not Biopython to cope with CS
+ from galaxy_utils.sequence.fastq import fastqReader
+ handle = open(filename, "rU")
+ for record in fastqReader(handle):
+ #The [1:] is because the fastaReader leaves the @ on the identifer.
+ yield record.identifier.split()[0][1:]
+ handle.close()
+ elif filetype=="sff":
+ try:
+ from Bio.SeqIO import index
+ except ImportError:
+ stop_err("Require Biopython 1.54 or later (to read SFF files)")
+ #This will read the SFF index block if present (very fast)
+ for name in index(filename, "sff"):
+ yield name
+ else:
+ stop_err("Unexpected file type %s" % filetype)
+
+def load_ids_whitelist(filename, filetype, whitelist):
+ for name in load_ids(filename, filetype):
+ if name in whitelist:
+ yield name
+ else:
+ stop_err("Unexpected ID %s in %s file %s" % (name, filetype, filename))
+
+if all_file in ["", "-", '""', '"-"']:
+ #Load without white list
+ sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
+ #Take union
+ all = set()
+ for s in sets:
+ all.update(s)
+ print "Inferred total of %i IDs" % len(all)
+else:
+ all = set(load_ids(all_file, all_type))
+ print "Total of %i IDs" % len(all)
+ sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
+
+for s, (f,t,c) in zip(sets, set_data):
+ print "%i in %s" % (len(s), c)
+
+#Now call R library to draw simple Venn diagram
+try:
+ #Create dummy Venn diagram counts object for three groups
+ cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
+ rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
+ rpy.r('colnames(groups) <- %s' % cols)
+ rpy.r('vc <- vennCounts(groups)')
+ #Populate the 2^n classes with real counts
+ #Don't make any assumptions about the class order
+ #print rpy.r('vc')
+ for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
+ if isinstance(row, int) or isinstance(row, float):
+ #Hack for rpy being too clever for single element row
+ row = [row]
+ names = all
+ for wanted, s in zip(row, sets):
+ if wanted:
+ names = names.intersection(s)
+ else:
+ names = names.difference(s)
+ rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
+ #print rpy.r('vc')
+ if n == 1:
+ #Single circle, don't need to add (Total XXX) line
+ names = [c for (t,f,c) in set_data]
+ else:
+ names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
+ rpy.r.assign("names", names)
+ rpy.r.assign("colors", ["red","green","blue"][:n])
+ rpy.r.pdf(pdf_file, 8, 8)
+ rpy.r("""vennDiagram(vc, include="both", names=names,
+ main="%s", sub="(Total %i)",
+ circle.col=colors)
+ """ % (all_label, len(all)))
+ rpy.r.dev_off()
+except Exception, exc:
+ stop_err( "%s" %str( exc ) )
+rpy.r.quit( save="no" )
+print "Done"
diff -r aefc86eda5f6 -r 116ccf1c84d5 venn_list-c96bef0643dc/tools/plotting/venn_list.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/venn_list-c96bef0643dc/tools/plotting/venn_list.txt Thu May 16 12:37:09 2013 -0400
@@ -0,0 +1,75 @@
+Galaxy tool to draw a Venn Diagram with up to 3 sets
+====================================================
+
+This tool is copyright 2011 by Peter Cock, The James Hutton Institute
+(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
+See the licence text below.
+
+This tool is a short Python script (using both the Galaxy and Biopython library
+functions) to extract ID lists from tabular, FASTA, FASTQ or SFF files to build
+sets, which are then drawn using the R limma package function vennDiagram
+(called from Python using rpy).
+
+There are just two files to install:
+
+* venn_list.py (the Python script)
+* venn_list.xml (the Galaxy tool definition)
+
+The suggested location is in the Galaxy folder tools/plotting next to other
+graph drawing tools.
+
+You will also need to modify the tools_conf.xml file to tell Galaxy to offer the
+tool. The suggested location is in the "Graph/Display Data" section. Simply add
+the line:
+
+
+
+You will also need to install Biopython 1.54 or later, and the R/Bioconductor
+pacakge limma. You should already have rpy installed for other Galaxy tools.
+
+
+History
+=======
+
+v0.0.3 - Initial public release.
+
+
+Developers
+==========
+
+This script and related tools are being developed on the following hg branch:
+http://bitbucket.org/peterjc/galaxy-central/src/tools
+
+For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use
+the following command from the Galaxy root folder:
+
+tar -czf venn_list.tar.gz tools/plotting/venn_list.*
+
+Check this worked:
+
+$ tar -tzf venn_list.tar.gz
+tools/plotting/venn_list.py
+tools/plotting/venn_list.txt
+tools/plotting/venn_list.xml
+
+
+Licence (MIT/BSD style)
+=======================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
diff -r aefc86eda5f6 -r 116ccf1c84d5 venn_list-c96bef0643dc/tools/plotting/venn_list.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/venn_list-c96bef0643dc/tools/plotting/venn_list.xml Thu May 16 12:37:09 2013 -0400
@@ -0,0 +1,113 @@
+
+ from lists
+
+venn_list.py
+#if $universe.type_select=="implicit":
+ - -
+#else:
+ $main $main.ext
+#end if
+"$main_lab"
+#for $s in $sets:
+ $s.set $s.set.ext "$s.lab"
+#end for
+$PDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ rpy
+ Bio
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is in tabular files, the identifier is assumed to be in column one.
+
+**What it does**
+
+Draws Venn Diagram for one, two or three sets (as a PDF file).
+
+You must supply one, two or three sets of identifiers -- corresponding
+to one, two or three circles on the Venn Diagram.
+
+In general you should also give the full list of all the identifiers
+explicitly. This is used to calculate the number of identifers outside
+the circles (and check the identifiers in the other files match up).
+The full list can be omitted by implicitly taking the union of the
+category sets. In this case, the count outside the categories (circles)
+will always be zero.
+
+The identifiers can be taken from the first column of a tabular file
+(e.g. query names in BLAST tabular output, or signal peptide predictions
+after filtering, etc), or from a sequence file (FASTA, FASTQ, SFF).
+
+For example, you may have a set of NGS reads (as a FASTA, FASTQ or SFF
+file), and the results of several different read mappings (e.g. to
+different references) as tabular files (filtered to have just the mapped
+reads). You could then show the different mappings (and their overlaps)
+as a Venn Diagram, and the outside count would be the unmapped reads.
+
+**Citations**
+
+The Venn Diagrams are drawn using Gordon Smyth's limma package from
+R/Bioconductor, http://www.bioconductor.org/
+
+The R library is called from Python via rpy, http://rpy.sourceforge.net/
+
+This tool uses Biopython to read SFF files. If you use this tool with
+SFF files in scientific work leading to a publication, please cite the
+Biopython application note:
+
+Cock et al 2009. Biopython: freely available Python tools for computational
+molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
+http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+
+
+