changeset 144:307bb7480adc draft default tip

planemo upload
author mzytnicki
date Fri, 20 May 2016 09:25:40 -0400
parents cdfe56de557b
children
files README.txt SMART/Java/Python/CompareOverlappingAdapt.py SMART/Java/Python/CompareOverlappingSmallRef.py SMART/Java/Python/GetReadDistribution.py SMART/Java/Python/GetReadSizes.py SMART/Java/Python/RemoveExons.py SMART/Java/Python/__init__.pyc SMART/Java/Python/changeTagName.py SMART/Java/Python/cleaning/GffCleaner.py SMART/Java/Python/cleaning/GtfCleaner.py SMART/Java/Python/clusterize.py SMART/Java/Python/clusterizeBySlidingWindows.py SMART/Java/Python/compareOverlapping.py SMART/Java/Python/getDistribution.py SMART/Java/Python/misc/Progress.pyc SMART/Java/Python/misc/RPlotter.py SMART/Java/Python/misc/RPlotter.pyc SMART/Java/Python/misc/UnlimitedProgress.pyc SMART/Java/Python/misc/Utils.pyc SMART/Java/Python/misc/__init__.pyc SMART/Java/Python/mySql/MySqlExonTable.pyc SMART/Java/Python/mySql/MySqlTable.pyc SMART/Java/Python/mySql/MySqlTranscriptTable.pyc SMART/Java/Python/mySql/__init__.pyc SMART/Java/Python/ncList/FileSorter.py SMART/Java/Python/ncList/FileSorter.pyc SMART/Java/Python/ncList/NCIndex.pyc SMART/Java/Python/ncList/NCList.pyc SMART/Java/Python/ncList/NCListCursor.pyc SMART/Java/Python/ncList/NCListFilePickle.pyc SMART/Java/Python/ncList/__init__.pyc SMART/Java/Python/structure/Bins.pyc SMART/Java/Python/structure/Interval.pyc SMART/Java/Python/structure/Mapping.pyc SMART/Java/Python/structure/Sequence.pyc SMART/Java/Python/structure/SequenceList.pyc SMART/Java/Python/structure/SubMapping.pyc SMART/Java/Python/structure/Transcript.pyc SMART/Java/Python/structure/TranscriptContainer.pyc SMART/Java/Python/structure/TranscriptList.pyc SMART/Java/Python/structure/__init__.pyc SMART/Java/__init__.pyc SMART/__init__.pyc SMART/galaxy/CleanTranscriptFile.xml SMART/galaxy/Clusterize.xml SMART/galaxy/CollapseReads.xml SMART/galaxy/CompareOverlappingAdapt.xml SMART/galaxy/CompareOverlappingSmallQuery.xml SMART/galaxy/CompareOverlappingSmallRef.xml SMART/galaxy/ConvertTranscriptFile.xml SMART/galaxy/CountReadGCPercent.xml SMART/galaxy/GetDifferentialExpression.xml SMART/galaxy/GetFlanking.xml SMART/galaxy/SelectByTag.xml SMART/galaxy/WrappGetLetterDistribution.xml SMART/galaxy/changeGffFeatures.xml SMART/galaxy/changeTagName.xml SMART/galaxy/clusterizeBySlidingWindows.xml SMART/galaxy/computeCoverage.xml SMART/galaxy/coordinatesToSequence.xml SMART/galaxy/getDifference.xml SMART/galaxy/getDistance.xml SMART/galaxy/getDistribution.xml SMART/galaxy/getExons.xml SMART/galaxy/getIntrons.xml SMART/galaxy/getReadDistribution.xml SMART/galaxy/getSizes.xml SMART/galaxy/getWigData.xml SMART/galaxy/getWigDistance.xml SMART/galaxy/getWigProfile.xml SMART/galaxy/mapperAnalyzer.xml SMART/galaxy/mergeSlidingWindowsClusters.xml SMART/galaxy/mergeTranscriptLists.xml SMART/galaxy/modifyGenomicCoordinates.xml SMART/galaxy/modifySequenceList.xml SMART/galaxy/plotCoverage.xml SMART/galaxy/plotTranscriptList.xml SMART/galaxy/removeExonLines.sh SMART/galaxy/removeExonLines.xml SMART/galaxy/removeIntrons.xml SMART/galaxy/restrictFromSize.xml SMART/galaxy/restrictTranscriptList.xml SMART/galaxy/trimSequences.xml commons/__init__.pyc commons/core/__init__.pyc commons/core/checker/RepetException.pyc commons/core/checker/__init__.pyc commons/core/coord/Align.pyc commons/core/coord/Map.pyc commons/core/coord/Range.pyc commons/core/coord/__init__.pyc commons/core/parsing/.BamParser.py.swp commons/core/parsing/AxtParser.pyc commons/core/parsing/BamParser.pyc commons/core/parsing/BedParser.pyc commons/core/parsing/BlastParser.pyc commons/core/parsing/BowtieParser.pyc commons/core/parsing/CoordsParser.pyc commons/core/parsing/ElandParser.pyc commons/core/parsing/ExoParser.pyc commons/core/parsing/FastaParser.pyc commons/core/parsing/FastqParser.pyc commons/core/parsing/GffParser.pyc commons/core/parsing/GtfParser.pyc commons/core/parsing/MapParser.pyc commons/core/parsing/MapperParser.pyc commons/core/parsing/MaqParser.pyc commons/core/parsing/NCListParser.pyc commons/core/parsing/ParserChooser.pyc commons/core/parsing/PklParser.pyc commons/core/parsing/PslParser.pyc commons/core/parsing/RmapParser.pyc commons/core/parsing/SamParser.pyc commons/core/parsing/SeqmapParser.pyc commons/core/parsing/SequenceListParser.pyc commons/core/parsing/ShrimpParser.pyc commons/core/parsing/Soap2Parser.pyc commons/core/parsing/SoapParser.pyc commons/core/parsing/TranscriptListParser.pyc commons/core/parsing/WigParser.pyc commons/core/parsing/__init__.pyc commons/core/seq/Bioseq.pyc commons/core/seq/__init__.pyc commons/core/writer/BedWriter.pyc commons/core/writer/CsvWriter.pyc commons/core/writer/EmblWriter.pyc commons/core/writer/FastaWriter.pyc commons/core/writer/FastqWriter.pyc commons/core/writer/GbWriter.pyc commons/core/writer/Gff2Writer.pyc commons/core/writer/Gff3Writer.pyc commons/core/writer/GtfWriter.pyc commons/core/writer/MapWriter.pyc commons/core/writer/MySqlTranscriptWriter.pyc commons/core/writer/SamWriter.pyc commons/core/writer/SequenceListWriter.pyc commons/core/writer/TranscriptListWriter.pyc commons/core/writer/TranscriptWriter.pyc commons/core/writer/UcscWriter.pyc commons/core/writer/WigWriter.pyc commons/core/writer/WriterChooser.pyc commons/core/writer/__init__.pyc test-data/smart_1.bed test-data/smart_clusterize_out_1.gff3
diffstat 143 files changed, 1553 insertions(+), 2456 deletions(-) [+]
line wrap: on
line diff
--- a/SMART/Java/Python/CompareOverlappingAdapt.py	Thu Jan 30 08:55:29 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2011
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-from optparse import OptionParser
-import os.path
-from SMART.Java.Python.CompareOverlappingSmallQuery import CompareOverlappingSmallQuery
-from SMART.Java.Python.CompareOverlappingSmallRef import CompareOverlappingSmallRef
-from SMART.Java.Python.CompareOverlapping import CompareOverlapping
-
-
-MAX_SIZE = 10000000
-REFERENCE = 0
-QUERY = 1
-
-
-class CompareOverlappingAdapt(object):
-
-	def __init__(self, verbosity):
-		self.verbosity = verbosity
-
-	def setReferenceFile(self, fileName):
-		self.refSize = os.path.getsize(fileName)
-		print "refsize:", self.refSize
-
-	def setQueryFile(self, fileName):
-		self.querySize = os.path.getsize(fileName)
-		print "querysize:", self.querySize
-
-	def run(self, options):
-		object = None
-		if self.refSize < MAX_SIZE:
-			if self.verbosity >= 100:
-				print "Using 'Compare Overlapping Small Reference'"
-			object = CompareOverlappingSmallQuery(options.verbosity)
-		elif self.querySize < MAX_SIZE:
-			if self.verbosity >= 100:
-				print "Using 'Compare Overlapping Small Query'"
-			object = CompareOverlappingSmallRef(options.verbosity)
-		if object != None:
-			object.setQueryFile(options.inputFileName1, options.format1)
-			object.setReferenceFile(options.inputFileName2, options.format2)
-			object.setOutputFile(options.outputFileName)
-			object.includeNotOverlapping(options.notOverlapping)
-			object.setDistance(options.distance)
-			object.setCollinear(options.collinear)
-			object.setAntisense(options.antisense)
-			object.setInvert(options.exclude)
-			object.setMinOverlap(options.minOverlap)
-			object.run()
-			return
-		if self.verbosity >= 100:
-			print "Using 'Compare Overlapping'"
-		object = CompareOverlapping(options.verbosity)
-		object.setInput(options.inputFileName1, options.format1, QUERY)
-		object.setInput(options.inputFileName2, options.format2, REFERENCE)
-		object.setOutput(options.outputFileName)
-		object.getAntisenseOnly(options.antisense)
-		object.getColinearOnly(options.collinear)
-		object.getInvert(options.exclude)
-		object.setMaxDistance(options.distance)
-		object.includeNotOverlapping(options.notOverlapping)
-		object.setMinOverlap(options.minOverlap)
-		object.run()
-
-
-if __name__ == "__main__":
-	
-	description = "Compare Overlapping Adapt v1.0.1: Start the relevant CompareOverlappingX depending on the input data. [Category: Data Comparison]"
-
-	parser = OptionParser(description = description)
-	parser.add_option("-i", "--input1",	        dest="inputFileName1", action="store",			           type="string", help="query input file [compulsory] [format: file in transcript format given by -f]")
-	parser.add_option("-f", "--format1",        dest="format1",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
-	parser.add_option("-j", "--input2",	        dest="inputFileName2", action="store",			           type="string", help="reference input file [compulsory] [format: file in transcript format given by -g]")
-	parser.add_option("-g", "--format2",        dest="format2",		  action="store",			           type="string", help="format of previous file [compulsory] [format: transcript file format]")
-	parser.add_option("-o", "--output",	        dest="outputFileName", action="store",			           type="string", help="output file [format: output file in GFF3 format]")
-	parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False,				 help="also output not overlapping data [format: bool] [default: false]")
-	parser.add_option("-d", "--distance",		dest="distance",	   action="store",	    default=0,	   type="int",	 help="accept some distance between query and reference [format: int]")
-	parser.add_option("-c", "--collinear",		dest="collinear",	   action="store_true", default=False,			 	 help="provide collinear features [format: bool] [default: false]")
-	parser.add_option("-a", "--antisense",		dest="antisense",	   action="store_true", default=False,			 	 help="provide antisense features [format: bool] [default: false]")
-	parser.add_option("-x", "--exclude",		dest="exclude",		   action="store_true", default=False,			 	 help="invert the match [format: bool] [default: false]")
-	parser.add_option("-m", "--minOverlap",  	dest="minOverlap",     action="store",      default=False, type="int",   help="min. # overlap between query and reference [format: bool] [default: false]")
-	parser.add_option("-v", "--verbosity",      dest="verbosity",	   action="store",      default=1,     type="int",	 help="trace level [format: int]")
-	(options, args) = parser.parse_args()
-
-	coa = CompareOverlappingAdapt(options.verbosity)
-	coa.setQueryFile(options.inputFileName1)
-	coa.setReferenceFile(options.inputFileName2)
-	coa.run(options)
--- a/SMART/Java/Python/CompareOverlappingSmallRef.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/CompareOverlappingSmallRef.py	Fri May 20 09:25:40 2016 -0400
@@ -235,6 +235,7 @@
 	cosr.includeNotOverlapping(options.notOverlapping)
 	cosr.setDistance(options.distance)
 	cosr.setAntisense(options.antisense)
+	cosr.setCollinear(options.collinear)
 	cosr.setInvert(options.exclude)
 	cosr.setMinPercentOverlap(options.pcOverlapQuery, options.pcOverlapRef)
 	cosr.setMinOverlap(options.minOverlap)
--- a/SMART/Java/Python/GetReadDistribution.py	Thu Jan 30 08:55:29 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,303 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import random, os, glob, subprocess
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.parsing.GffParser import GffParser
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc import Utils
-from commons.core.LoggerFactory import LoggerFactory
-from commons.core.utils.RepetOptionParser import RepetOptionParser
-
-LOG_DEPTH      = "smart"
-DEFAULT_REGION = "_all_"
-MULTIPLE_STR   = {1: "", 1000: " (in kbp)", 1000000: " (in Gbp)"}
-
-class GetReadDistribution(object):
-
-	def __init__(self, verbosity = 0):
-		self.xLab         = ""
-		self.yLab         = "# reads"
-		self.verbosity    = verbosity
-		self.number       = random.randint(0, 100000)
-		self.log          = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
-		self.parsers      = {}
-		self.distribution = {}
-		self.factors      = {}
-		self.regions      = None
-		self.tmpDatName   = None
-		self.tmpRName     = None
-		self.quorum       = 1
-		self.strands      = False
-		self.width        = 800
-		self.height       = 300
-		self.arial        = False
-
-	def setNames(self, names):
-		self.names = names
-
-	def setInputFiles(self, fileNames, format):
-		chooser = ParserChooser(self.verbosity)
-		chooser.findFormat(format)
-		for cpt, fileName in enumerate(fileNames):
-			self.parsers[self.names[cpt]] = chooser.getParser(fileName)
-
-	def setOutputFileName(self, fileName):
-		self.outputFileName = fileName
-
-	def setLabs(self, xLab, yLab):
-		self.xLab = xLab
-		self.yLab = yLab
-
-	def setBinSize(self, binSize):
-		self.binSize = binSize
-
-	def setColors(self, colors):
-		self.colors = colors
-
-	def setFactors(self, factors):
-		if factors == None:
-			self.factors = dict([name, 1.0] for name in self.names)
-		else:
-			self.factors = dict(zip(self.names, factors))
-
-	def setMultiple(self, boolean):
-		self.multiple = boolean
-	
-	def setImageSize(self, width, height):
-		if width != None:
-			self.width = width
-		if height != None:
-			self.height = height
-
-	def setQuorum(self, quorum):
-		self.quorum = quorum
-
-	def setRegionsFile(self, fileName):
-		if fileName != None:
-			self._loadRegions(fileName)
-
-	def setBothStrands(self, strands):
-		self.strands = strands
-
-	def setArial(self, arial):
-		self.arial = arial
-
-	def _checkOptions(self):
-		if not self.parsers:
-			self.logAndRaise("ERROR: Missing input file names")
-
-	def _logAndRaise(self, errorMsg):
-		self.log.error(errorMsg)
-		raise Exception(errorMsg)
-
-	def _loadRegions(self, fileName):
-		self.regions = {}
-		parser       = GffParser(fileName, self.verbosity)
-		for transcript in parser.getIterator():
-			chromosome = transcript.getChromosome()
-			start      = transcript.getStart()
-			end        = transcript.getEnd()
-			name       = transcript.getName()
-			if chromosome not in self.regions:
-				self.regions[chromosome] = {}
-			if start not in self.regions[chromosome]:
-				self.regions[chromosome][start] = {}
-			if end not in self.regions[chromosome][start]:
-				self.regions[chromosome][start][end] = []
-			self.regions[chromosome][start][end].append(name)
-
-	def _getRegions(self, transcript):
-		if self.regions == None:
-			return [DEFAULT_REGION]
-		chromosome = transcript.getChromosome()
-		start      = transcript.getStart()
-		end        = transcript.getEnd()
-		if chromosome not in self.regions:
-			return []
-		names = []
-		for loadedStart in sorted(self.regions[chromosome].keys()):
-			if loadedStart > end:
-				return names
-			for loadedEnd in reversed(sorted(self.regions[chromosome][loadedStart].keys())):
-				if loadedEnd < start:
-					break
-				names.extend(self.regions[chromosome][loadedStart][loadedEnd])
-		return names
-
-	def _parse(self, name):
-		progress = UnlimitedProgress(10000, "Reading file '%s'" % (name), self.verbosity)
-		for transcript in self.parsers[name].getIterator():
-			if transcript.__class__.__name__ == "Mapping":
-				transcript = transcript.getTranscript()
-			regions = self._getRegions(transcript)
-			for region in regions:
-				if region not in self.distribution:
-					self.distribution[region] = {}
-				if name not in self.distribution[region]:
-					self.distribution[region][name] = {}
-				chromosome  = transcript.getChromosome()
-				nbElements  = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1
-				nbElements *= self.factors.get(name, 1)
-				strand      = transcript.getDirection() if self.strands else 1
-				if chromosome not in self.distribution[region][name]:
-					self.distribution[region][name][chromosome] = {}
-				if strand not in self.distribution[region][name][chromosome]:
-					self.distribution[region][name][chromosome][strand] = {}
-				previousBin = None
-				for exon in transcript.getExons():
-					for pos in range(exon.getStart(), exon.getEnd()+1):
-						bin = pos / self.binSize
-						if bin != previousBin:
-							self.distribution[region][name][chromosome][strand][bin] = self.distribution[region][name][chromosome][strand].get(bin, 0) + nbElements
-							previousBin = bin
-			progress.inc()
-		progress.done()
-
-	def _checkQuorum(self, region):
-		if self.quorum == None:
-			return True
-		return max([max([max([max(self.distribution[region][name][chromosome][strand].values()) for strand in self.distribution[region][name][chromosome]]) for chromosome in self.distribution[region][name]]) for name in self.distribution[region]])
-
-	def _writeData(self, region):
-		self.tmpDatName = "tmpFile%d.dat" % (self.number)
-		handle          = open(self.tmpDatName, "w")
-		handle.write("Chr\tPos\tStrand\tCount\tSample\n")
-		for name in self.distribution[region]:
-			for chromosome in sorted(self.distribution[region][name].keys()):
-				for strand in sorted(self.distribution[region][name][chromosome].keys()):
-					for pos in sorted(self.distribution[region][name][chromosome][strand].keys()):
-						handle.write("%s\t%d\t%d\t%d\t\"%s\"\n" % (chromosome, pos * self.binSize, strand, self.distribution[region][name][chromosome][strand].get(pos, 0) * strand, name))
-		handle.close()
-
-	def _findMultiple(self, region):
-		if not self.multiple:
-			return 1
-		maxPosition = max([max([max([max(self.distribution[region][name][chromosome][strand].keys()) for strand in self.distribution[region][name][chromosome]]) for chromosome in self.distribution[region][name]]) for name in self.distribution[region]]) * self.binSize
-		if maxPosition > 2000000:
-			return 1000000
-		elif maxPosition > 2000:
-			return 1000
-		return 1
-
-	def _writeScript(self, region):
-		self.tmpRName = "tmpFile%d.R" % (self.number)
-		fileName      = self.outputFileName if region == DEFAULT_REGION else "%s_%s.png" % (os.path.splitext(self.outputFileName)[0], region)
-		colors        = "scale_fill_brewer(palette=\"Set1\") + scale_color_brewer(palette=\"Set1\")" if self.colors == None else "scale_fill_manual(values = c(%s)) + scale_color_manual(values = c(%s))" % (", ".join(["\"%s\"" % (color) for color in self.colors]), ", ".join(["\"%s\"" % (color) for color in self.colors]))
-		title         = "" if region == DEFAULT_REGION else " + labs(title = \"Distribution of %s\") " % (region)
-		facet         = "Sample ~ Chr" if region == DEFAULT_REGION else "Sample ~ ."
-		handle        = open(self.tmpRName, "w")
-		multiple      = self._findMultiple(region)
-		arial         = ", text = element_text(family=\"Arial\", size=20)" if self.arial else ""
-		if self.arial:
-			handle.write("library(extrafont)\nloadfonts()\n")
-		handle.write("library(ggplot2)\n")
-		handle.write("data <- read.table(\"%s\", header = T)\n" % (self.tmpDatName))
-		handle.write("data$Sample <- factor(data$Sample, levels=c(%s))\n" % (", ".join(["\"%s\"" % (name) for name in self.names])))
-		handle.write("png(\"%s\", width = %d, height = %d)\n" % (fileName, self.width, self.height))
-		handle.write("ggplot(data, aes(x = Pos/%d, y = Count, fill = Sample, color = Sample)) %s + geom_bar(stat = \"identity\") + facet_grid(%s, space=\"free\") + xlab(\"%s%s\") + ylab(\"%s\") + %s + theme(legend.position = \"none\", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()%s)\n" % (multiple, title, facet, self.xLab, MULTIPLE_STR[multiple], self.yLab, colors, arial))
-		handle.write("dev.off()\n")
-
-	def _runR(self):
-		rCommand = os.environ["SMARTRPATH"] if "SMARTRPATH" in os.environ else "R"
-		command  = "\"%s\" CMD BATCH %s" % (rCommand, self.tmpRName)
-		status   = subprocess.call(command, shell=True)
-		if status != 0:
-			raise Exception("Problem with the execution of script file %s, status is: %s" % (self.tmpRName, status))
-
-	def _plot(self):
-		progress = Progress(len(self.distribution), "Plotting data", self.verbosity)
-		for region in self.distribution:
-			if not self._checkQuorum(region):
-				self.log.info("Not displaying '%s' for it contains insufficient data." % (region))
-			else:
-				self._writeData(region)
-				self._writeScript(region)
-				self._runR()
-			progress.inc()
-		progress.done()
-
-	def _cleanFiles(self):
-		for fileName in (self.tmpDatName, self.tmpRName):
-			if fileName != None and os.path.exists(fileName):
-				os.remove(fileName)
-				for otherFileName in glob.glob("%s*" % (fileName)):
-					os.remove(otherFileName)
-
-	def run(self):
-		LoggerFactory.setLevel(self.log, self.verbosity)
-		self._checkOptions()
-		self.log.info("START Get Read Distribution")
-		for name in self.names:
-			self._parse(name)
-		self._plot()
-		self._cleanFiles()
-		self.log.info("END Get Read Distribution")
-
-
-if __name__ == "__main__":
-	description = "Usage: GetReadDistribution.py [options]\n\nGet Read Distribution v1.0.1: Get the distribution of a set of reads. [Category: Personal]\n"
-	epilog = ""
-	parser = RepetOptionParser(description = description, epilog = epilog)
-	parser.add_option("-i", "--input",     dest="inputFileNames",  action="store",      default=None,      type="string", help="input files, separated by commas [compulsory] [format: string]")
-	parser.add_option("-f", "--format",    dest="format",          action="store",      default=None,      type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")
-	parser.add_option("-n", "--names",     dest="names",           action="store",      default=None,      type="string", help="name of the input data, separated by commas [compulsory] [format: string]")
-	parser.add_option("-o", "--output",    dest="outputFileName",  action="store",      default=None,      type="string", help="output file [format: output file in PNG format]")
-	parser.add_option("-s", "--binSize",   dest="binSize",         action="store",      default=10000,     type="int",    help="bin size [format: int] [default: 10000]")
-	parser.add_option("-l", "--xLabel",    dest="xLab",            action="store",      default="",        type="string", help="x-axis label name [format: string]")
-	parser.add_option("-L", "--yLabel",    dest="yLab",            action="store",      default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")
-	parser.add_option("-c", "--colors",    dest="colors",          action="store",      default=None,      type="string", help="colors of the bars, separated by commas  [format: string]")
-	parser.add_option("-a", "--factors",   dest="factors",         action="store",      default=None,      type="string", help="normalization factors, separated by commas  [format: string]")
-	parser.add_option("-r", "--regions",   dest="regionsFileName", action="store",      default=None,      type="string", help="regions to plot [format: transcript file in GFF format]")
-	parser.add_option("-2", "--strands",   dest="strands",         action="store_true", default=False,                    help="plot negative strands on the negative x-axis [format: boolean] [default: False]")
-	parser.add_option("-m", "--multiple",  dest="multiple",        action="store_true", default=False,                    help="use human readable genomic positions (k, G) [format: boolean] [default: False]")
-	parser.add_option("-q", "--quorum",    dest="quorum",          action="store",      default=1,         type="int",    help="minimum number of intervals to plot a region [format: int] [default: 1]")
-	parser.add_option("-z", "--width",     dest="width",           action="store",      default=800,       type="int",    help="width of the image [format: int] [default: 800]")
-	parser.add_option("-Z", "--height",    dest="height",          action="store",      default=300,       type="int",    help="height of the image [format: int] [default: 300]")
-	parser.add_option("-A", "--arial",     dest="arial",           action="store_true", default=False,                    help="use Arial font [format: boolean] [default: false]")
-	parser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")
-	options = parser.parse_args()[0]
-	iGetReadDistribution = GetReadDistribution(options.verbosity)
-	iGetReadDistribution.setNames(options.names.split(","))
-	iGetReadDistribution.setInputFiles(options.inputFileNames.split(","), options.format)
-	iGetReadDistribution.setOutputFileName(options.outputFileName)
-	iGetReadDistribution.setLabs(options.xLab, options.yLab)
-	iGetReadDistribution.setBinSize(options.binSize)
-	iGetReadDistribution.setColors(None if options.colors == None else options.colors.split(","))
-	iGetReadDistribution.setFactors(None if options.factors == None else map(float, options.factors.split(",")))
-	iGetReadDistribution.setRegionsFile(options.regionsFileName)
-	iGetReadDistribution.setMultiple(options.multiple)
-	iGetReadDistribution.setQuorum(options.quorum)
-	iGetReadDistribution.setImageSize(options.width, options.height)
-	iGetReadDistribution.setBothStrands(options.strands)
-	iGetReadDistribution.setArial(options.arial)
-	iGetReadDistribution.run()
-
--- a/SMART/Java/Python/GetReadSizes.py	Thu Jan 30 08:55:29 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,282 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-import random, os, glob, subprocess
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.parsing.GffParser import GffParser
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc import Utils
-from commons.core.LoggerFactory import LoggerFactory
-from commons.core.utils.RepetOptionParser import RepetOptionParser
-
-LOG_DEPTH      = "smart"
-DEFAULT_REGION = "_all_"
-
-class GetReadSizes(object):
-
-	def __init__(self, verbosity = 0):
-		self.xLab       = "Size"
-		self.yLab       = "# reads"
-		self.verbosity  = verbosity
-		self.number     = random.randint(0, 100000)
-		self.log        = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self.verbosity)
-		self.parsers    = {}
-		self.sizes      = {}
-		self.factors    = {}
-		self.regions    = None
-		self.percentage = False
-		self.tmpDatName = None
-		self.tmpRName   = None
-		self.width      = 800
-		self.height     = 300
-		self.arial      = False
-
-	def setNames(self, names):
-		self.names = names
-
-	def setInputFiles(self, fileNames, format):
-		chooser = ParserChooser(self.verbosity)
-		chooser.findFormat(format)
-		for cpt, fileName in enumerate(fileNames):
-			self.parsers[self.names[cpt]] = chooser.getParser(fileName)
-		if not self.factors:
-			self.factors = dict([name, 1.0] for name in self.names)
-
-	def setOutputFileName(self, fileName):
-		self.outputFileName = fileName
-
-	def setLabs(self, xLab, yLab):
-		self.xLab = xLab
-		self.yLab = yLab
-
-	def setSizes(self, minSize, maxSize):
-		self.minSize = minSize
-		self.maxSize = maxSize
-
-	def setColors(self, colors):
-		self.colors = colors
-
-	def setFactors(self, factors):
-		if factors:
-			self.factors = dict(zip(self.names, factors))
-
-	def setRegionsFile(self, fileName):
-		if fileName != None:
-			self._loadRegions(fileName)
-
-	def setPercentage(self, percentage):
-		self.percentage = percentage
-		self.xLab = "% reads"
-
-	def setImageSize(self, width, height):
-		if width != None:
-			self.width = width
-		if height != None:
-			self.height = height
-
-	def setArial(self, arial):
-		self.arial = arial
-
-	def _checkOptions(self):
-		if not self.parsers:
-			self.logAndRaise("ERROR: Missing input file names")
-
-	def _logAndRaise(self, errorMsg):
-		self.log.error(errorMsg)
-		raise Exception(errorMsg)
-
-	def _loadRegions(self, fileName):
-		self.regions = {}
-		parser       = GffParser(fileName, self.verbosity)
-		for transcript in parser.getIterator():
-			chromosome = transcript.getChromosome()
-			start      = transcript.getStart()
-			end        = transcript.getEnd()
-			name       = transcript.getName()
-			if chromosome not in self.regions:
-				self.regions[chromosome] = {}
-			if start not in self.regions[chromosome]:
-				self.regions[chromosome][start] = {}
-			if end not in self.regions[chromosome][start]:
-				self.regions[chromosome][start][end] = []
-			self.regions[chromosome][start][end].append(name)
-
-	def _getRegions(self, transcript):
-		if self.regions == None:
-			return [DEFAULT_REGION]
-		chromosome = transcript.getChromosome()
-		start      = transcript.getStart()
-		end        = transcript.getEnd()
-		if chromosome not in self.regions:
-			return []
-		names = []
-		for loadedStart in sorted(self.regions[chromosome].keys()):
-			if loadedStart > end:
-				return names
-			for loadedEnd in reversed(sorted(self.regions[chromosome][loadedStart].keys())):
-				if loadedEnd < start:
-					break
-				names.extend(self.regions[chromosome][loadedStart][loadedEnd])
-		return names
-
-	def _parse(self, name):
-		progress = UnlimitedProgress(10000, "Reading file '%s'" % (name), self.verbosity)
-		for transcript in self.parsers[name].getIterator():
-			if transcript.__class__.__name__ == "Mapping":
-				transcript = transcript.getTranscript()
-			regions = self._getRegions(transcript)
-			for region in regions:
-				if region not in self.sizes:
-					self.sizes[region] = {}
-				if name not in self.sizes[region]:
-					self.sizes[region][name] = {}
-				size = transcript.getSize()
-				if (self.minSize == None or size >= self.minSize) and (self.maxSize == None or size <= self.maxSize):
-					nbElements                     = float(transcript.getTagValue("nbElements")) if "nbElements" in transcript.getTagNames() else 1
-					nbElements                    *= self.factors.get(name, 1)
-					self.sizes[region][name][size] = self.sizes[region][name].get(size, 0) + nbElements
-			progress.inc()
-		progress.done()
-		if self.minSize == None:
-			self.minSize = min([min(self.sizes[region][name].keys()) for name in self.names for region in region])
-		if self.maxSize == None:
-			self.maxSize = max([max(self.sizes[region][name].keys()) for name in self.names for region in region])
-
-	def _checkQuorum(self, region):
-		return (max([sum(self.sizes[region][name].values()) for name in self.sizes[region]]) > 0)
-
-	def _computePercentage(self):
-		for region in self.sizes:
-			for name in self.sizes[region]:
-				if self.sizes[region][name]:
-					sumData = float(sum(self.sizes[region][name].values()))
-					for size in self.sizes[region][name]:
-						self.sizes[region][name][size] = self.sizes[region][name][size] / sumData * 100
-
-	def _writeData(self, region):
-		self.tmpDatName = "tmpFile%d.dat" % (self.number)
-		handle          = open(self.tmpDatName, "w")
-		handle.write("Size\tCount\tSample\n")
-		for name in self.sizes[region]:
-			for size in sorted(self.sizes[region][name].keys()):
-				handle.write("%d\t%d\t\"%s\"\n" % (size, self.sizes[region][name].get(size, 0), name))
-		handle.close()
-
-	def _writeScript(self, region):
-		self.tmpRName = "tmpFile%d.R" % (self.number)
-		fileName      = self.outputFileName if region == DEFAULT_REGION else "%s_%s.png" % (os.path.splitext(self.outputFileName)[0], region)
-		colors        = "scale_fill_brewer(palette=\"Set1\")" if self.colors == None else "scale_fill_manual(values = c(%s))" % (", ".join(["\"%s\"" % (color) for color in self.colors]))
-		title         = "" if region == DEFAULT_REGION else " + labs(title = \"Sizes of %s\")" % (region)
-		handle        = open(self.tmpRName, "w")
-		arial         = ", text = element_text(family=\"Arial\", size=20)" if self.arial else ""
-		if self.arial:
-			handle.write("library(extrafont)\nloadfonts()\n")
-		handle.write("library(ggplot2)\n")
-		handle.write("data <- read.table(\"%s\", header = T)\n" % (self.tmpDatName))
-		handle.write("data$Sample <- factor(data$Sample, levels=c(%s))\n" % (", ".join(["\"%s\"" % (name) for name in self.names])))
-		handle.write("data$Size <- factor(data$Size, levels=c(%s))\n" % (", ".join(["%d" % (size) for size in range(self.minSize, self.maxSize+1)])))
-		handle.write("png(\"%s\", width = %d, height = %d)\n" % (fileName, self.width, self.height))
-		handle.write("ggplot(data, aes(x = Size, y = Count, fill = Size)) %s + geom_bar(stat = \"identity\") + facet_grid(. ~ Sample, space=\"free_x\") + xlab(\"%s\") + ylab(\"%s\") + %s + theme(legend.position = \"none\", panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()%s)\n" % (title, self.xLab, self.yLab, colors, arial))
-		handle.write("dev.off()\n")
-
-	def _runR(self):
-		rCommand = os.environ["SMARTRPATH"] if "SMARTRPATH" in os.environ else "R"
-		command  = "\"%s\" CMD BATCH %s" % (rCommand, self.tmpRName)
-		status   = subprocess.call(command, shell=True)
-		if status != 0:
-			raise Exception("Problem with the execution of script file %s, status is: %s" % (self.tmpRName, status))
-
-	def _plot(self):
-		progress = Progress(len(self.sizes), "Plotting data", self.verbosity)
-		for region in self.sizes:
-			if not self._checkQuorum(region):
-				self.log.info("Not displaying '%s' for it contains no data." % (region))
-			else:
-				self._writeData(region)
-				self._writeScript(region)
-				self._runR()
-			progress.inc()
-		progress.done()
-
-	def _cleanFiles(self):
-		for fileName in (self.tmpDatName, self.tmpRName):
-			if fileName != None and os.path.exists(fileName):
-				os.remove(fileName)
-				for otherFileName in glob.glob("%s*" % (fileName)):
-					os.remove(otherFileName)
-
-	def run(self):
-		LoggerFactory.setLevel(self.log, self.verbosity)
-		self._checkOptions()
-		self.log.info("START Get Read Sizes")
-		for name in self.names:
-			self._parse(name)
-		if self.percentage:
-			self._computePercentage()
-		self._plot()
-		self._cleanFiles()
-		self.log.info("END Get Read Sizes")
-
-
-if __name__ == "__main__":
-	description = "Usage: GetReadSizes.py [options]\n\nGet Read Sizes v1.0.1: Get the sizes of a set of reads. [Category: Personal]\n"
-	epilog = ""
-	parser = RepetOptionParser(description = description, epilog = epilog)
-	parser.add_option("-i", "--input",     dest="inputFileNames",  action="store",      default=None,     type="string", help="input files, separated by commas [compulsory] [format: string]")
-	parser.add_option("-f", "--format",    dest="format",          action="store",      default=None,     type="string", help="format of the input [compulsory] [format: transcript or sequence file format]")
-	parser.add_option("-n", "--names",     dest="names",           action="store",      default=None,     type="string", help="name of the input data, separated by commas [compulsory] [format: string]")
-	parser.add_option("-o", "--output",    dest="outputFileName",  action="store",      default=None,      type="string", help="output file [format: output file in PNG format]")
-	parser.add_option("-s", "--minSize",   dest="minSize",         action="store",      default=None,      type="int",    help="minimum size [format: int]")
-	parser.add_option("-S", "--maxSize",   dest="maxSize",         action="store",      default=None,      type="int",    help="maximum size [format: int]")
-	parser.add_option("-l", "--xLabel",    dest="xLab",            action="store",      default="Size",    type="string", help="x-axis label name [format: string] [default: Size]")
-	parser.add_option("-L", "--yLabel",    dest="yLab",            action="store",      default="# reads", type="string", help="y-axis label name [format: string] [default: Reads]")
-	parser.add_option("-c", "--colors",    dest="colors",          action="store",      default=None,      type="string", help="colors of the bars, separated by commas  [format: string]")
-	parser.add_option("-a", "--factors",   dest="factors",         action="store",      default=None,      type="string", help="normalization factors, separated by commas  [format: string]")
-	parser.add_option("-r", "--regions",   dest="regionsFileName", action="store",      default=None,      type="string", help="regions to plot [format: transcript file in GFF format]")
-	parser.add_option("-p", "--percent",   dest="percentage",      action="store_true", default=False,                    help="compute percentage instead [format: boolean] [default: false]")
-	parser.add_option("-z", "--width",     dest="width",           action="store",      default=800,       type="int",    help="width of the image [format: int] [default: 800]")
-	parser.add_option("-Z", "--height",    dest="height",          action="store",      default=300,       type="int",    help="height of the image [format: int] [default: 300]")
-	parser.add_option("-A", "--arial",     dest="arial",           action="store_true", default=False,                    help="use Arial font [format: boolean] [default: false]")
-	parser.add_option("-v", "--verbosity", dest="verbosity",       action="store",      default=1,         type="int",    help="trace level [format: int]")
-	options = parser.parse_args()[0]
-	iGetReadSizes = GetReadSizes(options.verbosity)
-	iGetReadSizes.setNames(options.names.split(","))
-	iGetReadSizes.setInputFiles(options.inputFileNames.split(","), options.format)
-	iGetReadSizes.setOutputFileName(options.outputFileName)
-	iGetReadSizes.setSizes(options.minSize, options.maxSize)
-	iGetReadSizes.setColors(None if options.colors == None else options.colors.split(","))
-	iGetReadSizes.setFactors(None if options.factors == None else map(float, options.factors.split(",")))
-	iGetReadSizes.setRegionsFile(options.regionsFileName)
-	iGetReadSizes.setPercentage(options.percentage)
-	iGetReadSizes.setImageSize(options.width, options.height)
-	iGetReadSizes.setLabs(options.xLab, options.yLab)
-	iGetReadSizes.setArial(options.arial)
-	iGetReadSizes.run()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/Java/Python/RemoveExons.py	Fri May 20 09:25:40 2016 -0400
@@ -0,0 +1,75 @@
+#! /usr/bin/env python
+#
+# Copyright INRA-URGI 2009-2010
+# 
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software. You can use,
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info".
+# 
+# As a counterpart to the access to the source code and rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty and the software's author, the holder of the
+# economic rights, and the successive licensors have only limited
+# liability.
+# 
+# In this respect, the user's attention is drawn to the risks associated
+# with loading, using, modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean that it is complicated to manipulate, and that also
+# therefore means that it is reserved for developers and experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or
+# data to be ensured and, more generally, to use and operate it in the
+# same conditions as regards security.
+# 
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+#
+from optparse import OptionParser
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.TranscriptWriter import TranscriptWriter
+from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.misc.Progress import Progress
+
+class RemoveExons(object):
+
+	def __init__(self, verbosity):
+		self.verbosity = verbosity
+		self.selection = False
+
+	def setInputFile(self, fileName, format):
+		chooser = ParserChooser(self.verbosity)
+		chooser.findFormat(format)
+		self.parser = chooser.getParser(fileName)
+
+	def setOutputFile(self, fileName):
+		self.writer = TranscriptWriter(fileName, "gff3", self.verbosity)
+		
+	def run(self):
+		progress = Progress(self.parser.getNbTranscripts(), "Reading input file", self.verbosity)
+		for transcript in self.parser.getIterator():
+			transcript.removeExons()
+			self.writer.addTranscript(transcript)
+			progress.inc()
+		self.writer.write()
+		self.writer.close()
+		progress.done()
+
+if __name__ == "__main__":
+	
+	description = "Remove Exons v1.0.1: Excluse the exons of a set of transcripts. [Category: Data Modification]"
+
+	parser = OptionParser(description = description)
+	parser.add_option("-i", "--input",     dest="inputFileName",  action="store",            type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+	parser.add_option("-f", "--format",    dest="format",         action="store",            type="string", help="format of file [compulsory] [format: transcript file format]")
+	parser.add_option("-o", "--output",    dest="outputFileName", action="store",            type="string", help="output file [format: output file in GFF3 format]")
+	parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1, type="int",	help="trace level [format: int]")
+	(options, args) = parser.parse_args()
+
+	re = RemoveExons(options.verbosity)
+	re.setInputFile(options.inputFileName, options.format)
+	re.setOutputFile(options.outputFileName)
+	re.run()
Binary file SMART/Java/Python/__init__.pyc has changed
--- a/SMART/Java/Python/changeTagName.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/changeTagName.py	Fri May 20 09:25:40 2016 -0400
@@ -83,6 +83,7 @@
     parser.transcriptListParser.close()
 
     writer.write()
+    writer.close()
 
     if options.mysql:
         mysqlWriter.write()
--- a/SMART/Java/Python/cleaning/GffCleaner.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/cleaning/GffCleaner.py	Fri May 20 09:25:40 2016 -0400
@@ -123,7 +123,7 @@
 	def parse(self):
 		progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
 		for cpt, line in enumerate(self.inputHandle):
-			if not line or line[0] == "#": continue
+			if not line or (not line.strip()) or line[0] == "#": continue
 			if line[0] == ">": break
 			parsedLine = ParsedLine(line, cpt)
 			if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
--- a/SMART/Java/Python/cleaning/GtfCleaner.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/cleaning/GtfCleaner.py	Fri May 20 09:25:40 2016 -0400
@@ -96,7 +96,7 @@
 	def parse(self):
 		progress = UnlimitedProgress(100000, "Reading input file", self.verbosity)
 		for cpt, line in enumerate(self.inputHandle):
-			if not line or line[0] == "#": continue
+			if not line or (not line.strip()) or line[0] == "#": continue
 			parsedLine = ParsedLine(line, cpt)
 			if self.acceptedTypes == None or parsedLine.type in self.acceptedTypes:
 				transcriptId = parsedLine.transcriptId
--- a/SMART/Java/Python/clusterize.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/clusterize.py	Fri May 20 09:25:40 2016 -0400
@@ -33,6 +33,7 @@
 
 import os, os.path, random
 from optparse import OptionParser
+from heapq import heappush, heappop
 from commons.core.parsing.ParserChooser import ParserChooser
 from commons.core.writer.Gff3Writer import Gff3Writer
 from SMART.Java.Python.structure.Transcript import Transcript
@@ -44,26 +45,31 @@
 class Clusterize(object):
 
 	def __init__(self, verbosity):
-		self.normalize		 = False
-		self.presorted		 = False
-		self.distance		  = 1
-		self.colinear		  = False
-		self.nbWritten		 = 0
-		self.nbMerges		  = 0
-		self.verbosity		 = verbosity
+		self.parsers           = {}
+		self.sortedFileNames   = {}
+		self.normalize         = False
+		self.presorted         = False
+		self.distance          = 1
+		self.collinear         = False
+		self.nbWritten         = 0
+		self.nbMerges          = 0
+		self.verbosity         = verbosity
 		self.splittedFileNames = {}
+		self.chromosomes       = set()
 
 	def __del__(self):
-		for fileName in self.splittedFileNames.values():
-			os.remove(fileName)
+		for fileName1 in self.splittedFileNames:
+			for fileName2 in self.splittedFileNames[fileName1].values():
+				os.remove(fileName2)
 
-	def setInputFile(self, fileName, format):
+	def setInputFiles(self, fileNames, format):
 		parserChooser = ParserChooser(self.verbosity)
 		parserChooser.findFormat(format)
-		self.parser = parserChooser.getParser(fileName)
-		self.sortedFileName = "%s_sorted_%d.pkl" % (os.path.splitext(fileName)[0], random.randint(1, 100000))
-		if "SMARTTMPPATH" in os.environ:
-			self.sortedFileName = os.path.join(os.environ["SMARTTMPPATH"], os.path.basename(self.sortedFileName))
+		for fileName in fileNames:
+			self.parsers[fileName] = parserChooser.getParser(fileName)
+			self.sortedFileNames[fileName] = "%s_sorted_%d.pkl" % (os.path.splitext(fileName)[0], random.randint(1, 100000))
+			if "SMARTTMPPATH" in os.environ:
+				self.sortedFileNames[fileName] = os.path.join(os.environ["SMARTTMPPATH"], os.path.basename(self.sortedFileNames[fileName]))
 
 	def setOutputFileName(self, fileName, format="gff3", title="S-MART", feature="transcript", featurePart="exon"):
 		writerChooser = WriterChooser()
@@ -76,8 +82,8 @@
 	def setDistance(self, distance):
 		self.distance = distance
 
-	def setColinear(self, colinear):
-		self.colinear = colinear
+	def setColinear(self, collinear):
+		self.collinear = collinear
 
 	def setNormalize(self, normalize):
 		self.normalize = normalize
@@ -85,42 +91,60 @@
 	def setPresorted(self, presorted):
 		self.presorted = presorted
 
-	def _sortFile(self):
+	def _sortFiles(self):
 		if self.presorted:
 			return
-		fs = FileSorter(self.parser, self.verbosity-4)
-		fs.perChromosome(True)
-		fs.setPresorted(self.presorted)
-		fs.setOutputFileName(self.sortedFileName)
-		fs.sort()
-		self.splittedFileNames       = fs.getOutputFileNames()
-		self.nbElementsPerChromosome = fs.getNbElementsPerChromosome()
-		self.nbElements              = fs.getNbElements()
+		for fileName, parser in self.parsers.iteritems():
+			fs = FileSorter(parser, self.verbosity-4)
+			fs.perChromosome(True)
+			fs.setPresorted(self.presorted)
+			fs.setOutputFileName(self.sortedFileNames[fileName])
+			fs.sort()
+			self.splittedFileNames[fileName] = fs.getOutputFileNames()
+			self.chromosomes.update(self.splittedFileNames[fileName].keys())
 		
-	def _iterate(self, chromosome):
-		if chromosome == None:
-			progress = UnlimitedProgress(10000, "Reading input file", self.verbosity)
-			parser   = self.parser
+	def _iterate(self):
+		progress = UnlimitedProgress(10000, "Reading input file", self.verbosity)
+		parsersSets = []
+		self.nbElements = 0
+		if self.chromosomes:
+			for chromosome in self.chromosomes:
+				parsersSets.append([NCListFileUnpickle(self.splittedFileNames[fileName][chromosome]) for fileName in self.splittedFileNames if chromosome in self.splittedFileNames[fileName]])
 		else:
-			progress = Progress(self.nbElementsPerChromosome[chromosome], "Checking chromosome %s" % (chromosome), self.verbosity)
-			parser   = NCListFileUnpickle(self.splittedFileNames[chromosome], self.verbosity)
-		transcripts = []
-		for newTranscript in parser.getIterator():
-			newTranscripts = []
-			if newTranscript.__class__.__name__ == "Mapping":
-				newTranscript = newTranscript.getTranscript()
-			for oldTranscript in transcripts:
-				if self._checkOverlap(newTranscript, oldTranscript):
-					self._merge(newTranscript, oldTranscript)
-				elif self._checkPassed(newTranscript, oldTranscript):
-					self._write(oldTranscript)
-				else:
-					newTranscripts.append(oldTranscript)
-			newTranscripts.append(newTranscript)
-			transcripts = newTranscripts
-			progress.inc()
-		for transcript in transcripts:
-			self._write(transcript)
+			parsersSets.append(self.parsers.values())
+		for parsers in parsersSets:
+			transcripts = []
+			heap        = []
+			for parser in parsers:
+				iterator = parser.getIterator()
+				for transcript in iterator:
+					if transcript.__class__.__name__ == "Mapping":
+						transcript = transcript.getTranscript()
+					heappush(heap, (transcript.getChromosome(), transcript.getStart(), -transcript.getEnd(), transcript, iterator))
+					break
+			while heap:
+				chromosome, start, end, newTranscript, iterator = heappop(heap)
+				for transcript in iterator:
+					if transcript.__class__.__name__ == "Mapping":
+						transcript = transcript.getTranscript()
+					heappush(heap, (transcript.getChromosome(), transcript.getStart(), -transcript.getEnd(), transcript, iterator))
+					break
+				newTranscripts = []
+				if newTranscript.__class__.__name__ == "Mapping":
+					newTranscript = newTranscript.getTranscript()
+				for oldTranscript in transcripts:
+					if self._checkOverlap(newTranscript, oldTranscript):
+						self._merge(newTranscript, oldTranscript)
+					elif self._checkPassed(newTranscript, oldTranscript):
+						self._write(oldTranscript)
+					else:
+						newTranscripts.append(oldTranscript)
+				newTranscripts.append(newTranscript)
+				transcripts = newTranscripts
+				self.nbElements += 1
+				progress.inc()
+			for transcript in transcripts:
+				self._write(transcript)
 		progress.done()
 
 	def _merge(self, transcript1, transcript2):
@@ -135,7 +159,7 @@
 	def _checkOverlap(self, transcript1, transcript2):
 		if transcript1.getChromosome() != transcript2.getChromosome():
 			return False
-		if self.colinear and transcript1.getDirection() != transcript2.getDirection():
+		if self.collinear and transcript1.getDirection() != transcript2.getDirection():
 			return False
 		if transcript1.getDistance(transcript2) > self.distance:
 			return False
@@ -145,12 +169,8 @@
 		return ((transcript1.getChromosome() != transcript2.getChromosome()) or (transcript1.getDistance(transcript2) > self.distance))
 
 	def run(self):
-		self._sortFile()
-		if self.presorted:
-			self._iterate(None)
-		else:
-			for chromosome in sorted(self.splittedFileNames.keys()):
-				self._iterate(chromosome)
+		self._sortFiles()
+		self._iterate()
 		self.writer.close()
 		if self.verbosity > 0:
 			print "# input:   %d" % (self.nbElements)
@@ -162,21 +182,21 @@
 	description = "Clusterize v1.0.3: clusterize the data which overlap. [Category: Merge]"
 
 	parser = OptionParser(description = description)
-	parser.add_option("-i", "--input",     dest="inputFileName",  action="store",				     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-	parser.add_option("-f", "--format",    dest="format",		 action="store",				     type="string", help="format of file [format: transcript file format]")
-	parser.add_option("-o", "--output",    dest="outputFileName", action="store",				     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
-	parser.add_option("-u", "--outputFormat", dest="outputFormat", action="store",     default="gff",		     type="string", help="output file format [format: transcript file format]")
-	parser.add_option("-c", "--colinear",  dest="colinear",       action="store_true", default=False,				help="merge colinear transcripts only [format: bool] [default: false]")
-	parser.add_option("-d", "--distance",  dest="distance",       action="store",      default=0,     type="int",    help="max. distance between two transcripts to be merged [format: int] [default: 0]")
-	parser.add_option("-n", "--normalize", dest="normalize",      action="store_true", default=False,				help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
-	parser.add_option("-s", "--sorted",    dest="sorted",		 action="store_true", default=False,				help="input is already sorted [format: bool] [default: false]")
-	parser.add_option("-v", "--verbosity", dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
+	parser.add_option("-i", "--inputs",       dest="inputFileNames", action="store",				     type="string", help="input files (separated by commas) [compulsory] [format: string]")
+	parser.add_option("-f", "--format",       dest="format",		 action="store",				     type="string", help="format of file [format: transcript file format]")
+	parser.add_option("-o", "--output",       dest="outputFileName", action="store",				     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
+	parser.add_option("-u", "--outputFormat", dest="outputFormat",   action="store",      default="gff",		        type="string", help="output file format [format: transcript file format]")
+	parser.add_option("-c", "--collinear",    dest="collinear",      action="store_true", default=False,				help="merge collinear transcripts only [format: bool] [default: false]")
+	parser.add_option("-d", "--distance",     dest="distance",       action="store",      default=0,     type="int",    help="max. distance between two transcripts to be merged [format: int] [default: 0]")
+	parser.add_option("-n", "--normalize",    dest="normalize",      action="store_true", default=False,				help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
+	parser.add_option("-s", "--sorted",       dest="sorted",		 action="store_true", default=False,				help="input is already sorted [format: bool] [default: false]")
+	parser.add_option("-v", "--verbosity",    dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
 	(options, args) = parser.parse_args()
 
 	c = Clusterize(options.verbosity)
-	c.setInputFile(options.inputFileName, options.format)
+	c.setInputFiles(options.inputFileNames.split(","), options.format)
 	c.setOutputFileName(options.outputFileName, options.outputFormat)
-	c.setColinear(options.colinear)
+	c.setColinear(options.collinear)
 	c.setDistance(options.distance)
 	c.setNormalize(options.normalize)
 	c.setPresorted(options.sorted)
--- a/SMART/Java/Python/clusterizeBySlidingWindows.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/clusterizeBySlidingWindows.py	Fri May 20 09:25:40 2016 -0400
@@ -28,196 +28,317 @@
 # The fact that you are presently reading this means that you have had
 # knowledge of the CeCILL license and that you accept its terms.
 #
+import re
+from commons.core.writer.WriterChooser import WriterChooser
 """
 Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.
 """
 
-import os, os.path, re
+import os, os.path
 from optparse import OptionParser
 from SMART.Java.Python.structure.Transcript import Transcript
+from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
+from SMART.Java.Python.misc.RPlotter import RPlotter
 from SMART.Java.Python.misc.Progress import Progress
-from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
-from commons.core.parsing.ParserChooser import ParserChooser
-from commons.core.writer.WriterChooser import WriterChooser
 from commons.core.writer.Gff3Writer import Gff3Writer
 
 class ClusterizeBySlidingWindows(object):
 
-	def __init__(self, verbosity = 0):
-		self.verbosity	    = verbosity
-		self.strands		= (0, )
-		self.outputFileName = ''
-		self.defaultValue	= None
-		self.tag 			= None
-		self.valuesPerStrand = {}
+    def __init__(self, verbosity = 0):
+        self.verbosity = verbosity
+        self.strands   = (0, )
+        self.normalize = False
+        self.plot      = None
+        self.excel     = None
+        self.outputFileName = ''
+        self.defaultValue = None
+
+    def __del__(self):
+        pass
+
+    def setInputFile(self, fileName, format):
+        self.parser = TranscriptContainer(fileName, format, self.verbosity)
+
+    def setOutputFileName(self, fileName, format="gff", title="S-MART", feature="transcript", featurePart="exon"):
+        writerChooser = WriterChooser(self.verbosity)
+        writerChooser.findFormat(format)
+        self.writer = writerChooser.getWriter(fileName)
+        self.writer.setTitle(title)
+        self.writer.setFeature(feature)
+        self.writer.setFeaturePart(featurePart)
+#        self.outputFileName = fileName
+#        self.outputFormat = format
+
+    def setWindowSize(self, size):
+        self.size = size
 
-	def setInputFile(self, fileNames, format):
-		parserChooser = ParserChooser(self.verbosity)
-		parserChooser.findFormat(format)
-		if self.outputTagNames is None:
-			if len(fileNames) == 1:
-				self.outputTagNames = ["nbElements"]
-			else:
-				operation = "nbElements" if self.operation is None else self.operation.lower()
-				self.outputTagNames = ["%s%s" % (operation, os.path.splitext(os.path.basename(fileName))[0].title()) for fileName in fileNames]
-		self.parsers = dict(zip(self.outputTagNames, [parserChooser.getParser(fileName) for fileName in fileNames]))
+    def setWindowOverlap(self, overlap):
+        self.overlap = overlap
+
+    def setTag(self, tag):
+        self.tag = tag
+
+    def setOperation(self, operation):
+        self.operation = operation
+
+    def setBothStrands(self, bothStrands):
+        if bothStrands:
+            self.strands = (-1, 1)
 
-	def setOutputFileName(self, fileName, format="gff", title="S-MART", feature="transcript", featurePart="exon"):
-		writerChooser = WriterChooser(self.verbosity)
-		writerChooser.findFormat(format)
-		self.writer = writerChooser.getWriter(fileName)
-		self.writer.setTitle(title)
-		self.writer.setFeature(feature)
-		self.writer.setFeaturePart(featurePart)
+    def setNormalize(self, normalize):
+        self.normalize = normalize
+
+    def setPlot(self, plot):
+        self.plot = plot
 
-	def setWindowSize(self, size):
-		self.size = size
+    def setExcel(self, excel):
+        self.excel = excel
 
-	def setWindowOverlap(self, overlap):
-		self.overlap = overlap
+    def setOutputTag(self, tag):
+        self.outputTagName = tag
+        
+    def setDefaultValue(self, defaultValue):
+        self.defaultValue = defaultValue
 
-	def setTag(self, tag):
-		self.tag = tag
-
-	def setOperation(self, operation):
-		self.operation = operation
+    def checkOptions(self):
+#        if self.operation != None:
+#            raise Exception("Trying to combine the values without specifying tag! Aborting...")
+        if self.operation != None and self.operation not in ("sum", "avg", "med", "min", "max"):
+            raise Exception("Do not understand tag '%s'! Aborting..." % (self.operation))
 
-	def setBothStrands(self, bothStrands):
-		if bothStrands:
-			self.strands = (-1, 1)
+    def getChromosomeSizes(self):
+        self.sizes = {}
+        progress = Progress(self.parser.getNbTranscripts(), "Getting sizes in genome", self.verbosity)
+        for transcript in self.parser.getIterator():
+            self.sizes[transcript.getChromosome()] = max(transcript.getStart(), self.sizes.get(transcript.getChromosome(), 0))
+            progress.inc()
+        progress.done()
 
-	def setOutputTag(self, tags):
-		self.outputTagNames = tags
-		
-	def setDefaultValue(self, defaultValue):
-		self.defaultValue = defaultValue
+    def getBinsFromPos(self, pos):
+        bin = (pos - 1) / (self.size - self.overlap)
+        if bin >= 1 and pos <= bin * (self.size - self.overlap) + self.overlap:
+            return (bin - 1, bin)
+        return (bin, )
+
+    def getPosFromBin(self, bin):
+        return (bin * (self.size - self.overlap) + 1, bin * (self.size - self.overlap) + self.size)
 
-	def checkOptions(self):
-		if self.operation != None and self.operation not in ("sum", "avg", "med", "min", "max"):
-			raise Exception("Do not understand tag '%s'! Aborting..." % (self.operation))
-		if self.size is None:
-			raise Exception("Please mention a windows size! Aborting...")
-		if self.overlap is None:
-			raise Exception("Please mention a windows overlap size! Aborting...")
+    def initializeBins(self):
+        self.binsPerStrand        = {}
+        self.sumsPerStrand        = {}
+        self.valuesPerStrand      = {}
+        self.toBePlottedPerStrand = {}
+        for strand in self.strands:
+            self.binsPerStrand[strand]        = {}
+            self.sumsPerStrand[strand]        = {}
+            self.valuesPerStrand[strand]      = {}
+            self.toBePlottedPerStrand[strand] = {}
+            for chromosome in self.sizes:
+                binRange = range(self.getBinsFromPos(self.sizes[chromosome])[-1] + 1)
+                self.binsPerStrand[strand][chromosome]        = dict([[i, 0]   for i in binRange])
+                self.sumsPerStrand[strand][chromosome]        = dict([[i, 0.0] for i in binRange])
+                self.valuesPerStrand[strand][chromosome]      = dict([[i, []]  for i in binRange])
+                self.toBePlottedPerStrand[strand][chromosome] = dict([[i, 0] for i in binRange])
 
-	def getBinsFromPos(self, pos):
-		bin = (pos - 1) / (self.size - self.overlap)
-		if bin >= 1 and pos <= bin * (self.size - self.overlap) + self.overlap:
-			return (bin - 1, bin)
-		return (bin, )
+    def getNbElements(self, transcript):
+        nbOccurrences = 1 if "nbOccurrences" not in transcript.getTagNames() else transcript.getTagValue("nbOccurrences")
+        nbElements    = 1 if "nbElements"    not in transcript.getTagNames() else transcript.getTagValue("nbElements")
+        nbOccurrences = float(nbOccurrences)
+        nbElements = float(nbElements)
+        nbElements /= float(nbOccurrences)
+        return nbElements
 
-	def getPosFromBin(self, bin):
-		return (bin * (self.size - self.overlap) + 1, bin * (self.size - self.overlap) + self.size)
+    def setBins(self):
+        progress = Progress(self.parser.getNbTranscripts(), "Setting bins", self.verbosity)
+        for transcript in self.parser.getIterator():
+            nbElements = self.getNbElements(transcript)
+            strand     = transcript.getDirection() if len(self.strands) == 2 else 0
+            for bin in self.getBinsFromPos(transcript.getStart()):
+                self.binsPerStrand[strand][transcript.getChromosome()][bin] += nbElements
+                if self.tag != None:
+                    if self.tag not in transcript.getTagNames():
+                        if self.defaultValue is None:
+                            raise Exception("Tag %s undefined in transcript %s" % (self.tag, transcript))
+                        value = self.defaultValue
+                    else:
+                        value = float(transcript.getTagValue(self.tag))
+                    self.sumsPerStrand[strand][transcript.getChromosome()][bin] += value
+                    self.valuesPerStrand[strand][transcript.getChromosome()][bin].append(value)
+            progress.inc()
+        progress.done()
 
-	def getNbElements(self, transcript):
-		nbOccurrences = 1 if "nbOccurrences" not in transcript.getTagNames() else transcript.getTagValue("nbOccurrences")
-		nbElements	= 1 if "nbElements"	not in transcript.getTagNames() else transcript.getTagValue("nbElements")
-		nbOccurrences = float(nbOccurrences)
-		nbElements	= float(nbElements)
-		nbElements   /= float(nbOccurrences)
-		return nbElements
+    def aggregateData(self):
+        if self.operation == "sum":
+            self.computeSumData()
+        elif self.operation == "avg":
+            self.computeAvgData()
+        elif self.operation == "med":
+            self.computeMedData()
+        elif self.operation == "min":
+            self.computeMinData()
+        elif self.operation == "max":
+            self.computeMaxData()
+        elif self.operation == "GCpercent":
+            self.computeGCPercent()
+        else:
+            self.toBePlottedPerStrand = self.binsPerStrand
+
+    def computeSumData(self):
+        self.toBePlottedPerStrand = self.sumsPerStrand
 
-	def setBins(self, name, parser):
-		progress = UnlimitedProgress(10000, "Setting bins of file %s" % (parser.fileName), self.verbosity)
-		for transcript in parser.getIterator():
-			nbElements = self.getNbElements(transcript)
-			strand	   = transcript.getDirection() if len(self.strands) == 2 else 0
-			chromosome = transcript.getChromosome()
-			if strand not in self.valuesPerStrand:
-				self.valuesPerStrand[strand] = {}
-			if chromosome not in self.valuesPerStrand[strand]:
-				self.valuesPerStrand[strand][chromosome] = {}
-			for bin in self.getBinsFromPos(transcript.getStart()):
-				if self.tag is None:
-					value = nbElements
-				else:
-					if tag in transcript.getTagNames():
-						value = float(transcript.getTagValue(tag))
-					else:
-						if self.defaultValue is None:
-							raise Exception("Tag %s undefined in transcript %s" % (tag, transcript))
-						value = self.defaultValue
-				if bin not in self.valuesPerStrand[strand][chromosome]:
-					self.valuesPerStrand[strand][chromosome][bin] = {}
-				if name not in self.valuesPerStrand[strand][chromosome][bin]:
-					self.valuesPerStrand[strand][chromosome][bin][name] = []
-				self.valuesPerStrand[strand][transcript.getChromosome()][bin][name].append(value)
-			progress.inc()
-		progress.done()
+    def computeAvgData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.binsPerStrand[strand][chromosome][bin] != 0:
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = float(self.sumsPerStrand[strand][chromosome][bin]) / self.binsPerStrand[strand][chromosome][bin]
+
+    def computeMedData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        self.valuesPerStrand[strand][chromosome][bin].sort()
+                        size = len(self.valuesPerStrand[strand][chromosome][bin])
+                        if size % 2 == 1:
+                            self.toBePlottedPerStrand[strand][chromosome][bin] = self.valuesPerStrand[strand][chromosome][bin][(size - 1) / 2]
+                        else:
+                            self.toBePlottedPerStrand[strand][chromosome][bin] = (self.valuesPerStrand[strand][chromosome][bin][size / 2 - 1] + self.valuesPerStrand[strand][chromosome][bin][size / 2]) / 2.0
+
+    def computeMinData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = min(self.valuesPerStrand[strand][chromosome][bin])
 
-	def aggregateData(self, values):
-		if self.operation is None or self.operation == "sum":
-			return sum(values)
-		if self.operation == "avg":
-			return sum(values) / float(len(values))
-		if self.operation == "med":
-			size		 = len(values)
-			sortedValues = sorted(values)
-			if size % 2 == 1:
-				return sorted[(size - 1) / 2]
-			return (sortedValues[size / 2 - 1] + sortedValues[size / 2]) / 2.0
-		if self.operation == "min":
-			return min(values)
-		if self.operation == "max":
-			return max(values)
-		if self.operation == "GCpercent":
-			subSequence = self.valuesPerStrand[strand][chromosome][bin]
-			NPercent = 100 * (values.countNt("N") / float(values.getSize()))
-			if NPercent >= 50:
-				return "NA"
-			return subSequence.getGCpercentageInSequenceWithoutCountNInLength()
+    def computeMaxData(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = max(self.valuesPerStrand[strand][chromosome][bin])
+                        
+    def computeGCPercent(self):
+        for strand in self.strands:
+            for chromosome in self.binsPerStrand[strand]:
+                for bin in self.binsPerStrand[strand][chromosome]:
+                    if self.valuesPerStrand[strand][chromosome][bin]:
+                        subSequence = self.valuesPerStrand[strand][chromosome][bin]
+                        NPercent = 100 * (subSequence.countNt("N") / float(subSequence.getSize()))
+                        if NPercent >= 50:
+                            currentGCpercent = "NA"
+                        else:
+                            currentGCpercent = subSequence.getGCpercentageInSequenceWithoutCountNInLength()
+                        
+                        self.toBePlottedPerStrand[strand][chromosome][bin] = currentGCpercent
+        #TODO: see if a map method could be used for the various "compute" methods 
+        #return currentGCpercent, NPercent
+        
+    def plotData(self):
+        if self.plot != None:
+            for strand in self.strands:
+                adjunct = ""
+                if strand != 0:
+                    adjunct = "Strand%d" % (strand)
+                for chromosome in self.toBePlottedPerStrand[strand]:
+                    if len(self.toBePlottedPerStrand[strand][chromosome].keys()) > 0:
+                        plotter = RPlotter(self.plot, self.verbosity)
+                        plotter.setFill(0)
+                        plotter.addLine(self.toBePlottedPerStrand[strand][chromosome], chromosome)
+                        plotter.plot()
 
-	def printRegions(self):
-		cpt		   = 1
-		for strand in self.strands:
-			for chromosome in sorted(self.valuesPerStrand[strand]):
-				for bin in sorted(self.valuesPerStrand[strand][chromosome]):
-					transcript = Transcript()
-					transcript.setName("region%d" % cpt)
-					transcript.setChromosome(chromosome)
-					transcript.setStart(self.getPosFromBin(bin)[0])
-					transcript.setEnd(self.getPosFromBin(bin)[1])
-					transcript.setDirection(1 if strand == 0 else strand)
-					for name in self.valuesPerStrand[strand][chromosome][bin]:
-						transcript.setTagValue(name, self.aggregateData(self.valuesPerStrand[strand][chromosome][bin][name]))
-					self.writer.addTranscript(transcript)
-					cpt += 1
-		self.writer.close()
+    def writeExcel(self):
+        if self.excel != None:
+            excelFile = open(self.excel, "w")
+            for strand in self.strands:
+                maxBin = max([max(self.toBePlottedPerStrand[strand][chromosome].keys()) for chromosome in self.binsPerStrand[strand]])
+                for bin in range(0, maxBin + 1):
+                    excelFile.write(",%d-%d" % self.getPosFromBin(bin))
+                excelFile.write("\n")
+                for chromosome in self.toBePlottedPerStrand[strand]:
+                    excelFile.write("%s" % (chromosome))
+                    for bin in self.toBePlottedPerStrand[strand][chromosome]:
+                        excelFile.write(",%f" % (self.toBePlottedPerStrand[strand][chromosome][bin]))
+                    excelFile.write("\n")
+            excelFile.close()
 
-	def run(self):
-		self.checkOptions()
-		for key, parser in self.parsers.iteritems():
-			self.setBins(key, parser)
-		self.printRegions()
+    def printRegions(self):
+        cpt           = 1
+        tagOp         = "nb"
+        tagName       = "Elements"
+        outputTagName = "nbElements"
+        if self.operation != None:
+            tagOp = self.operation.lower()
+        if self.tag != None:
+            tagName = self.tag.title()
+        if self.outputTagName != None:
+            outputTagName = self.outputTagName
+            
+     
+        #writer = Gff3Writer(self.outputFileName, self.verbosity)
+        
+        for strand in self.strands:
+            for chromosome in self.toBePlottedPerStrand[strand]:
+                for bin in self.toBePlottedPerStrand[strand][chromosome]:
+                    transcript = Transcript()
+                    transcript.setName("region%d" % cpt)
+                    transcript.setChromosome(chromosome)
+                    transcript.setStart(self.getPosFromBin(bin)[0])
+                    transcript.setEnd(self.getPosFromBin(bin)[1])
+                    transcript.setDirection(1 if strand == 0 else strand)
+                    transcript.setTagValue(outputTagName, self.binsPerStrand[strand][chromosome][bin])
+                    transcript.setTagValue("%s%s" % (tagOp, tagName), str(self.toBePlottedPerStrand[strand][chromosome][bin]))
+                    self.writer.addTranscript(transcript)
+                    cpt += 1
+        self.writer.close()
+
+    def run(self):
+        self.checkOptions()
+        self.getChromosomeSizes()
+        self.initializeBins()
+        self.setBins()
+        self.aggregateData()
+        if self.excel:
+            self.writeExcel()
+        if self.plot:
+            self.plotData()
+        self.printRegions()
+
 
 if __name__ == "__main__":
-	
-	# parse command line
-	description = "Clusterize by Sliding Windows v1.0.2: Produces a GFF3 file that clusters a list of transcripts using a sliding window. [Category: Sliding Windows]"
+    
+    # parse command line
+    description = "Clusterize by Sliding Windows v1.0.1: Produces a GFF3 file that clusters a list of transcripts using a sliding window. [Category: Sliding Windows]"
 
-	parser = OptionParser(description = description)
-	parser.add_option("-i", "--input",        dest="inputFileName",  action="store",					 type="string", help="input files, separated by commas [compulsory] [format: string]")
-	parser.add_option("-f", "--inputFormat",  dest="inputFormat",    action="store",					 type="string", help="format of the input file [compulsory] [format: transcript file format]")
-	parser.add_option("-o", "--output",	      dest="outputFileName", action="store",					 type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
-	parser.add_option("-u", "--outputFormat", dest="outputFormat",   action="store",      default="gff", type="string", help="format of the output file [format: transcript file format]")
-	parser.add_option("-s", "--size",         dest="size",	         action="store",					 type="int",	help="size of the regions [compulsory] [format: int]")
-	parser.add_option("-e", "--overlap",	  dest="overlap",        action="store",					 type="int",	help="overlap between two consecutive regions [compulsory] [format: int]")
-	parser.add_option("-g", "--tag",          dest="tag",            action="store",      default=None,  type="string", help="use a given tag as input (instead of summing number of features) [format: string]")	
-	parser.add_option("-r", "--operation",    dest="operation",      action="store",      default=None,  type="string", help="combine tag value with given operation [format: choice (sum, avg, med, min, max)]")
-	parser.add_option("-d", "--defaultValue", dest="defaultValue",   action="store",					 type="float",  help="default value for input tag [format: float]")
-	parser.add_option("-w", "--write",	      dest="writeTag",       action="store",      default=None,  type="string", help="print the result in the given tags, separated by commas (default usually is 'nbElements') [format: string]")	
-	parser.add_option("-2", "--strands",      dest="strands",		 action="store_true", default=False,                help="consider the two strands separately [format: bool] [default: false]")
-	parser.add_option("-v", "--verbosity",    dest="verbosity",	     action="store",      default=1,	 type="int",	help="trace level [format: int] [default: 1]")
-	(options, args) = parser.parse_args()
+    parser = OptionParser(description = description)
+    parser.add_option("-i", "--input",       dest="inputFileName",  action="store",                     type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
+    parser.add_option("-f", "--inputFormat", dest="inputFormat",    action="store",                     type="string", help="format of the input file [compulsory] [format: transcript file format]")
+    parser.add_option("-o", "--output",      dest="outputFileName", action="store",                     type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
+    parser.add_option("-u", "--outputFormat", dest="outputFormat",  action="store",     default="gff",  type="string", help="format of the output file [format: transcript file format]")
+    parser.add_option("-s", "--size",        dest="size",           action="store",                     type="int",    help="size of the regions [compulsory] [format: int]")
+    parser.add_option("-e", "--overlap",     dest="overlap",        action="store",                     type="int",    help="overlap between two consecutive regions [compulsory] [format: int]")
+    parser.add_option("-m", "--normalize",   dest="normalize",      action="store_true", default=False,                help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
+    parser.add_option("-g", "--tag",         dest="tag",            action="store",      default=None,  type="string", help="use a given tag as input (instead of summing number of features) [format: string]")    
+    parser.add_option("-r", "--operation",   dest="operation",      action="store",      default=None,  type="string", help="combine tag value with given operation [format: choice (sum, avg, med, min, max)]")
+    parser.add_option("-d", "--defaultValue",dest="defaultValue",   action="store",                     type="float",    help="default value for input tag [format: float]")
+    parser.add_option("-w", "--write",       dest="writeTag",       action="store",      default=None,  type="string", help="print the result in the given tag (default usually is 'nbElements') [format: string]")    
+    parser.add_option("-2", "--strands",     dest="strands",        action="store_true", default=False,                help="consider the two strands separately [format: bool] [default: false]")
+    parser.add_option("-p", "--plot",        dest="plot",           action="store",      default=None,  type="string", help="plot regions to the given file [format: output file in PNG format]")
+    parser.add_option("-x", "--excel",       dest="excel",          action="store",      default=None,  type="string", help="write an Excel file to the given file [format: output file in Excel format]")
+    parser.add_option("-v", "--verbosity",   dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int] [default: 1]")
+    (options, args) = parser.parse_args()
 
-	cbsw = ClusterizeBySlidingWindows(options.verbosity)
-	cbsw.setOperation(options.operation)
-	cbsw.setTag(options.tag)
-	cbsw.setOutputTag(None if options.writeTag is None else options.writeTag.split(","))
-	cbsw.setInputFile(options.inputFileName.split(","), options.inputFormat)
-	cbsw.setOutputFileName(options.outputFileName, options.outputFormat)
-	cbsw.setWindowSize(options.size)
-	cbsw.setWindowOverlap(options.overlap)
-	cbsw.setDefaultValue(options.defaultValue)
-	cbsw.setBothStrands(options.strands)
-	cbsw.run()
+    cbsw = ClusterizeBySlidingWindows(options.verbosity)
+    cbsw.setInputFile(options.inputFileName, options.inputFormat)
+    cbsw.setOutputFileName(options.outputFileName, options.outputFormat)
+    cbsw.setWindowSize(options.size)
+    cbsw.setWindowOverlap(options.overlap)
+    cbsw.setTag(options.tag)
+    cbsw.setDefaultValue(options.defaultValue)
+    cbsw.setOperation(options.operation)
+    cbsw.setOutputTag(options.writeTag)
+    cbsw.setBothStrands(options.strands)
+    cbsw.setPlot(options.plot)
+    cbsw.setExcel(options.excel)
+    cbsw.run()
--- a/SMART/Java/Python/compareOverlapping.py	Thu Jan 30 08:55:29 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Compare overlap of two transcript lists"""
-import sys
-import os
-from optparse import OptionParser
-from SMART.Java.Python.misc import Utils
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from commons.core.writer.TranscriptWriter import TranscriptWriter
-from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
-from SMART.Java.Python.misc.RPlotter import RPlotter
-from commons.core.writer.Gff3Writer import Gff3Writer
-
-class CompareOverlapping(object):
-
-    def __init__(self):
-        self._options = None
-
-
-    def setAttributesFromCmdLine(self):
-        description = "Compare Overlapping v1.0.3: Get the data which overlap with a reference set. [Category: Data Comparison]"
-
-        parser = OptionParser(description = description)
-        parser.add_option("-i", "--input1",           dest="inputFileName1", action="store",                     type="string", help="input file 1 [compulsory] [format: file in transcript format given by -f]")
-        parser.add_option("-f", "--format1",          dest="format1",        action="store",                     type="string", help="format of file 1 [compulsory] [format: transcript file format]")
-        parser.add_option("-j", "--input2",           dest="inputFileName2", action="store",                     type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]")
-        parser.add_option("-g", "--format2",          dest="format2",        action="store",                     type="string", help="format of file 2 [compulsory] [format: transcript file format]")
-        parser.add_option("-o", "--output",           dest="output",         action="store",      default=None,  type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-        parser.add_option("-S", "--start1",           dest="start1",         action="store",      default=None,  type="int",    help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]")
-        parser.add_option("-s", "--start2",           dest="start2",         action="store",      default=None,  type="int",    help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]")
-        parser.add_option("-U", "--end1",             dest="end1",           action="store",      default=None,  type="int",    help="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]")
-        parser.add_option("-u", "--end2",             dest="end2",           action="store",      default=None,  type="int",    help="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]")
-        parser.add_option("-t", "--intron",           dest="introns",        action="store_true", default=False,                help="also report introns [format: bool] [default: false]")
-        parser.add_option("-E", "--5primeExtension1", dest="fivePrime1",     action="store",      default=None,  type="int",    help="extension towards 5' in file 1 [format: int]")
-        parser.add_option("-e", "--5primeExtension2", dest="fivePrime2",     action="store",      default=None,  type="int",    help="extension towards 5' in file 2 [format: int]")
-        parser.add_option("-N", "--3primeExtension1", dest="threePrime1",    action="store",      default=None,  type="int",    help="extension towards 3' in file 1 [format: int]")
-        parser.add_option("-n", "--3primeExtension2", dest="threePrime2",    action="store",      default=None,  type="int",    help="extension towards 3' in file 2 [format: int]")
-        parser.add_option("-c", "--colinear",         dest="colinear",       action="store_true", default=False,                help="colinear only [format: bool] [default: false]")
-        parser.add_option("-a", "--antisense",        dest="antisense",      action="store_true", default=False,                help="antisense only [format: bool] [default: false]")
-        parser.add_option("-d", "--distance",         dest="distance",       action="store",      default=None,  type="int",    help="accept some distance between query and reference [format: int]")
-        parser.add_option("-k", "--included",         dest="included",       action="store_true", default=False,                help="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]")
-        parser.add_option("-K", "--including",        dest="including",      action="store_true", default=False,                help="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]")
-        parser.add_option("-m", "--minOverlap",       dest="minOverlap",     action="store",      default=1,     type="int",    help="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]")
-        parser.add_option("-p", "--pcOverlap",        dest="pcOverlap",      action="store",      default=None,  type="int",    help="minimum percentage of nucleotides to overlap to declare an overlap [format: int]")
-        parser.add_option("-O", "--notOverlapping",   dest="notOverlapping", action="store_true", default=False,                help="also output not overlapping data [format: bool] [default: false]")
-        parser.add_option("-x", "--exclude",          dest="exclude",        action="store_true", default=False,                help="invert the match [format: bool] [default: false]")
-        parser.add_option("-v", "--verbosity",        dest="verbosity",      action="store",      default=1,     type="int",    help="trace level [format: int]")
-        parser.add_option("-l", "--log",              dest="log",            action="store_true", default=False,                help="write a log file [format: bool] [default: false]")
-        (self._options, args) = parser.parse_args()
-
-
-    def run(self):             
-        logHandle = None
-        if self._options.log:
-            logHandle = open(self._options.output, "w")
-
-        transcriptContainer1 = TranscriptContainer(self._options.inputFileName1, self._options.format1, self._options.verbosity)
-        transcriptContainer2 = TranscriptContainer(self._options.inputFileName2, self._options.format2, self._options.verbosity)
-        writer               = TranscriptWriter(self._options.output, "gff3", self._options.verbosity)
-
-        transcriptListComparator = TranscriptListsComparator(logHandle, self._options.verbosity)
-        transcriptListComparator.restrictToStart(transcriptListComparator.QUERY, self._options.start1)
-        transcriptListComparator.restrictToStart(transcriptListComparator.REFERENCE, self._options.start2)
-        transcriptListComparator.restrictToEnd(transcriptListComparator.QUERY, self._options.end1)
-        transcriptListComparator.restrictToEnd(transcriptListComparator.REFERENCE, self._options.end2)
-        transcriptListComparator.extendFivePrime(transcriptListComparator.QUERY, self._options.fivePrime1)
-        transcriptListComparator.extendFivePrime(transcriptListComparator.REFERENCE, self._options.fivePrime2)
-        transcriptListComparator.extendThreePrime(transcriptListComparator.QUERY, self._options.threePrime1)
-        transcriptListComparator.extendThreePrime(transcriptListComparator.REFERENCE, self._options.threePrime2)
-        transcriptListComparator.acceptIntrons(transcriptListComparator.QUERY, self._options.introns)
-        transcriptListComparator.acceptIntrons(transcriptListComparator.REFERENCE, self._options.introns)
-        transcriptListComparator.getAntisenseOnly(self._options.antisense)
-        transcriptListComparator.getColinearOnly(self._options.colinear)
-        transcriptListComparator.getInvert(self._options.exclude)
-        transcriptListComparator.setMaxDistance(self._options.distance)
-        transcriptListComparator.setMinOverlap(self._options.minOverlap)
-        transcriptListComparator.setPcOverlap(self._options.pcOverlap)
-        transcriptListComparator.setIncludedOnly(self._options.included)
-        transcriptListComparator.setIncludingOnly(self._options.including)
-        transcriptListComparator.includeNotOverlapping(self._options.notOverlapping)
-        transcriptListComparator.computeOdds(True)
-        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, transcriptContainer1)
-        transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, transcriptContainer2)
-        transcriptListComparator.setOutputWriter(writer)
-        transcriptListComparator.compareTranscriptList()
-
-        if self._options.log:
-            logHandle.close()
-
-        if not self._options.exclude:
-            odds = transcriptListComparator.getOdds()
-            if self._options.verbosity > 0 and odds:
-                print "min/avg/med/max transcripts: %d/%.2f/%.1f/%d" % Utils.getMinAvgMedMax(odds)
-                
-if __name__ == "__main__":
-    icompareOverlapping = CompareOverlapping()
-    icompareOverlapping.setAttributesFromCmdLine()
-    icompareOverlapping.run()
--- a/SMART/Java/Python/getDistribution.py	Thu Jan 30 08:55:29 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,291 +0,0 @@
-#! /usr/bin/env python
-#
-# Copyright INRA-URGI 2009-2010
-# 
-# This software is governed by the CeCILL license under French law and
-# abiding by the rules of distribution of free software. You can use,
-# modify and/ or redistribute the software under the terms of the CeCILL
-# license as circulated by CEA, CNRS and INRIA at the following URL
-# "http://www.cecill.info".
-# 
-# As a counterpart to the access to the source code and rights to copy,
-# modify and redistribute granted by the license, users are provided only
-# with a limited warranty and the software's author, the holder of the
-# economic rights, and the successive licensors have only limited
-# liability.
-# 
-# In this respect, the user's attention is drawn to the risks associated
-# with loading, using, modifying and/or developing or reproducing the
-# software by the user in light of its specific status of free software,
-# that may mean that it is complicated to manipulate, and that also
-# therefore means that it is reserved for developers and experienced
-# professionals having in-depth computer knowledge. Users are therefore
-# encouraged to load and test the software's suitability as regards their
-# requirements in conditions enabling the security of their systems and/or
-# data to be ensured and, more generally, to use and operate it in the
-# same conditions as regards security.
-# 
-# The fact that you are presently reading this means that you have had
-# knowledge of the CeCILL license and that you accept its terms.
-#
-"""Get the repartition of some elements in a chromosomes"""
-
-import os
-from optparse import OptionParser
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.structure.Transcript import Transcript
-from commons.core.writer.Gff3Writer import Gff3Writer
-from SMART.Java.Python.misc.RPlotter import RPlotter
-from SMART.Java.Python.misc.Progress import Progress
-from math import *
-
-def divideKeyDict(dictionary, ratio):
-    return dict([(key / ratio, dictionary[key]) for key in dictionary])
-
-
-def setTranscript(chromosome, direction, start, end, name, value):
-    transcript = Transcript()
-    transcript.setChromosome(chromosome)
-    transcript.setDirection(direction)
-    transcript.setStart(start)
-    transcript.setEnd(end)
-    transcript.setName(name)
-    transcript.setTagValue("nbElements", value)
-    return transcript
-
-
-
-if __name__ == "__main__":
-    
-    magnifyingFactor = 1000
-    
-    # parse command line
-    description = "Get Distribution v1.0.1: Get the distribution of the genomic coordinates on a genome. [Category: Visualization]"
-
-    parser = OptionParser(description = description)
-    parser.add_option("-i", "--input",       dest="inputFileName",     action="store",                           type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
-    parser.add_option("-f", "--format",      dest="format",            action="store",                           type="string", help="format of the input file [compulsory] [format: transcript file format]")
-    parser.add_option("-o", "--output",      dest="outputFileName",    action="store",                           type="string", help="output file [compulsory] [format: output file in GFF3 format]")
-    parser.add_option("-r", "--reference",   dest="referenceFileName", action="store",      default=None,        type="string", help="file containing the genome [compulsory] [format: file in FASTA format]")
-    parser.add_option("-n", "--nbBins",      dest="nbBins",            action="store",      default=1000,        type="int",    help="number of bins [default: 1000] [format: int]")
-    parser.add_option("-2", "--bothStrands", dest="bothStrands",       action="store_true", default=False,                      help="plot one curve per strand [format: bool] [default: false]")
-    parser.add_option("-w", "--raw",         dest="raw",               action="store_true", default=False,                      help="plot raw number of occurrences instead of density [format: bool] [default: false]")
-    parser.add_option("-x", "--csv",         dest="csv",               action="store_true", default=False,                      help="write a .csv file [format: bool]")
-    parser.add_option("-c", "--chromosome",  dest="chromosome",        action="store",      default=None,        type="string", help="plot only a chromosome [format: string]")
-    parser.add_option("-s", "--start",       dest="start",             action="store",      default=None,        type="int",    help="start from a given region [format: int]")
-    parser.add_option("-e", "--end",         dest="end",               action="store",      default=None,        type="int",    help="end from a given region [format: int]")
-    parser.add_option("-y", "--yMin",        dest="yMin",              action="store",      default=None,        type="int",    help="minimum value on the y-axis to plot [format: int]")
-    parser.add_option("-Y", "--yMax",        dest="yMax",              action="store",      default=None,        type="int",    help="maximum value on the y-axis to plot [format: int]")
-    parser.add_option("-g", "--gff",         dest="gff",               action="store_true", default=False,                      help="also write GFF3 file [format: bool] [default: false]")
-    parser.add_option("-H", "--height",      dest="height",            action="store",      default=None,        type="int",    help="height of the graphics [format: int] [default: 300]")
-    parser.add_option("-W", "--width",       dest="width",             action="store",      default=None,        type="int",    help="width of the graphics [format: int] [default: 1000]")
-    parser.add_option("-v", "--verbosity",   dest="verbosity",         action="store",      default=1,           type="int",    help="trace level [default: 1] [format: int]")
-    parser.add_option("-l", "--log",         dest="log",               action="store_true", default=False,                      help="write a log file [format: bool]")
-    parser.add_option("-D", "--directory",   dest="working_Dir",       action="store",      default=os.getcwd(), type="string", help="the directory to store the results [format: directory]")
-    (options, args) = parser.parse_args()
-
-    sizes = {}
-    if options.referenceFileName != None:
-        # get the sizes of the chromosomes
-        referenceHandle = open(options.referenceFileName)
-        name            = None
-        size            = 0
-        maxSize         = 0
-        for line in referenceHandle:
-            line = line.strip()
-            if line == "": continue
-            if line[0] == ">":
-                if name != None:
-                    if options.verbosity > 10:
-                        print name
-                    sizes[name] = size
-                    maxSize     = max(maxSize, size)
-                    size        = 0
-                name = line[1:]
-            else:
-                size += len(line)
-        sizes[name] = size
-        maxSize     = max(maxSize, size)
-        if options.verbosity > 1:
-            print "done"
-        start = 0
-        end   = maxSize
-    else:
-        if options.chromosome == None or options.start == None or options.end == None:
-            raise Exception("Missing chromosome or start and end positions, or reference file")
-        maxSize                   = options.end
-        sizes[options.chromosome] = options.end
-        start                     = options.start
-        end                       = options.end
-
-    
-    tmp1      = int(maxSize / float(options.nbBins))
-    tmp2      = 10 ** (len("%d" % (tmp1))-2)
-    sliceSize = int((tmp1 / tmp2) * tmp2)
-    
-    bins      = dict()
-    binsPlus  = dict()
-    binsMinus = dict()
-    for chromosome in sizes:
-        bins[chromosome]      = dict([(i * sliceSize + 1, 0) for i in range(start / sliceSize, sizes[chromosome] / sliceSize + 1)])
-        binsPlus[chromosome]  = dict([(i * sliceSize + 1, 0) for i in range(start / sliceSize, sizes[chromosome] / sliceSize + 1)])
-        binsMinus[chromosome] = dict([(i * sliceSize + 1, 0) for i in range(start / sliceSize, sizes[chromosome] / sliceSize + 1)])
-
-    parser   = TranscriptContainer(options.inputFileName, options.format, options.verbosity)
-    progress = Progress(parser.getNbTranscripts(), "Parsing %s" % (options.inputFileName), options.verbosity)
-    maxSlice = 0
-    # count the number of reads
-    for transcript in parser.getIterator():
-        if options.chromosome == None or (transcript.getChromosome() == options.chromosome and transcript.getStart() >= start and transcript.getStart() <= end):
-            if transcript.getDirection() == 1:
-                binsPlus[transcript.getChromosome()][(transcript.getStart() / sliceSize) * sliceSize + 1] += 1
-            else:
-                binsMinus[transcript.getChromosome()][(transcript.getStart() / sliceSize) * sliceSize + 1] += 1
-            bins[transcript.getChromosome()][(transcript.getStart() / sliceSize) * sliceSize + 1] += 1
-            maxSlice = max(maxSlice, transcript.getStart() / sliceSize)
-        progress.inc()
-    progress.done()
-
-    # compute densities
-    densityPlus = dict()
-    for chromosome in bins:
-        densityPlus[chromosome] = dict([(bin, 0) for bin in binsPlus[chromosome]])
-        for bin in binsPlus[chromosome]:
-            densityPlus[chromosome][bin] = float(binsPlus[chromosome][bin]) / sliceSize * magnifyingFactor
-        # correct densities for first and last bins
-        if start % sliceSize != 0:
-            densityPlus[chromosome][(start / sliceSize) * sliceSize + 1] = float(binsPlus[chromosome][(start / sliceSize) * sliceSize + 1]) / (sliceSize - (start % sliceSize)) * magnifyingFactor
-        if sizes[chromosome] % sliceSize != 0:
-            densityPlus[chromosome][(sizes[chromosome] / sliceSize) * sliceSize + 1] = float(binsPlus[chromosome][(sizes[chromosome] / sliceSize) * sliceSize + 1]) / (sizes[chromosome] % sliceSize) * magnifyingFactor
-    densityMinus = dict()
-    for chromosome in binsMinus:
-        densityMinus[chromosome] = dict([(bin, 0) for bin in binsMinus[chromosome]])
-        for bin in binsMinus[chromosome]:
-            densityMinus[chromosome][bin] = float(binsMinus[chromosome][bin]) / sliceSize * magnifyingFactor
-        # correct densities for first and last bins
-        if start % sliceSize != 0:
-            densityMinus[chromosome][(start / sliceSize) * sliceSize + 1] = float(binsMinus[chromosome][(start / sliceSize) * sliceSize + 1]) / (sliceSize - (start % sliceSize)) * magnifyingFactor
-        if sizes[chromosome] % sliceSize != 0:
-            densityMinus[chromosome][(sizes[chromosome] / sliceSize) * sliceSize + 1] = float(binsMinus[chromosome][(sizes[chromosome] / sliceSize) * sliceSize + 1]) / (sizes[chromosome] % sliceSize) * magnifyingFactor
-    density = dict()
-    for chromosome in bins:
-        density[chromosome] = dict([(bin, 0) for bin in bins[chromosome]])
-        for bin in bins[chromosome]:
-            density[chromosome][bin] = densityPlus[chromosome][bin] + densityMinus[chromosome][bin]
-
-    for chromosome in densityMinus:
-        for bin in densityMinus[chromosome]:
-            densityMinus[chromosome][bin] *= -1
-        for bin in binsMinus[chromosome]:
-            binsMinus[chromosome][bin] *= -1
-
-    for chromosome in density:
-        maxX = max(bins[chromosome].keys())
-        if maxX <= 1000:
-            unit  = "nt."
-            ratio = 1.0
-        elif maxX <= 1000000:
-            unit  = "kb"
-            ratio = 1000.0
-        else:
-            unit  = "Mb"
-            ratio = 1000000.0
-        outputFileName = "%s_%s" % (options.outputFileName, chromosome)
-        if options.start != None and options.end != None:
-            outputFileName += ":%d-%d" % (options.start, options.end)
-        outputFileName += ".png"
-        plotter = RPlotter(outputFileName, options.verbosity)
-        plotter.setXLabel("Position on %s (in %s)" % (chromosome.replace("_", " "), unit))
-        plotter.setYLabel("# reads")
-        if options.bothStrands:
-            plotter.setImageSize(1000, 300)
-        else:
-            plotter.setImageSize(1000, 200)
-        if options.height != None:
-            plotter.setHeight(options.height)
-        if options.width != None:
-            plotter.setWidth(options.width)
-        if options.yMax != None:
-            plotter.setMinimumY(options.yMin)
-        if options.yMax != None:
-            plotter.setMaximumY(options.yMax)
-        if options.bothStrands :
-            if options.raw:
-                plotter.addLine(divideKeyDict(binsPlus[chromosome], ratio))
-            else:
-                plotter.addLine(divideKeyDict(densityPlus[chromosome], ratio))
-            if options.raw:
-                plotter.addLine(divideKeyDict(binsMinus[chromosome], ratio))
-            else:
-                plotter.addLine(divideKeyDict(densityMinus[chromosome], ratio))
-        else:
-            if options.raw:
-                plotter.addLine(divideKeyDict(bins[chromosome], ratio))
-            else:
-                plotter.addLine(divideKeyDict(density[chromosome], ratio))
-        plotter.plot()
-        
-    if options.csv:
-        outputFileName = "%s" % (options.outputFileName)
-        if options.chromosome != None:
-            outputFileName += "_%s" % (options.chromosome)
-        if options.start != None and options.end != None:
-            outputFileName += ":%d-%d" % (options.start, options.end)
-        outputFileName += ".csv"
-        csvHandle = open(outputFileName, "w")
-        for slice in range(start / sliceSize, maxSlice + 1):
-            csvHandle.write(";%d-%d" % (slice * sliceSize + 1, (slice+1) * sliceSize))
-        csvHandle.write("\n")
-        if options.bothStrands:
-            for chromosome in densityPlus:
-                if len(densityPlus[chromosome]) > 0:
-                    csvHandle.write("%s [+]" % (chromosome))
-                    for slice in sorted(densityPlus[chromosome].keys()):
-                        csvHandle.write(";%.2f" % (densityPlus[chromosome][slice]))
-                    csvHandle.write("\n")            
-                if len(densityMinus[chromosome]) > 0:
-                    csvHandle.write("%s [-]" % (chromosome))
-                    for slice in sorted(densityPlus[chromosome].keys()):
-                        csvHandle.write(";%.2f" % (-densityMinus[chromosome][slice]))
-                    csvHandle.write("\n")            
-        else:
-            for chromosome in density:
-                if len(density[chromosome]) > 0:
-                    csvHandle.write(chromosome)
-                    for slice in sorted(density[chromosome].keys()):
-                        csvHandle.write(";%.2f" % (density[chromosome][slice]))
-                    csvHandle.write("\n")
-        csvHandle.close()
-             
-    if options.gff:
-        chromosome = "" if options.chromosome == None                         else options.chromosome.capitalize()
-        start      = "" if options.start      == None                         else "%d" % (options.start)
-        end        = "" if options.end        == None                         else "%d" % (options.end)
-        link1      = "" if options.start      == None and options.end == None else ":"
-        link2      = "" if options.start      == None and options.end == None else "-"
-        writer     = Gff3Writer("%s%s%s%s%s.gff3" % (options.outputFileName, link1, start, link2, end), options.verbosity)
-        cpt = 1
-        if options.raw:
-            valuesPlus  = binsPlus
-            valuesMinus = binsMinus
-            values      = bins
-        else:
-            valuesPlus  = densityPlus
-            valuesMinus = densityMinus
-            values      = density
-        if options.bothStrands:
-            for chromosome in values:
-                for slice in valuesPlus[chromosome]:
-                    writer.addTranscript(setTranscript(chromosome, 1, slice, slice + sliceSize, "region%d" % (cpt), valuesPlus[chromosome][slice]))
-                    cpt += 1
-                for slice in valuesMinus[chromosome]:
-                    writer.addTranscript(setTranscript(chromosome, -1, slice, slice + sliceSize, "region%d" % (cpt), - valuesMinus[chromosome][slice]))
-                    cpt += 1
-        else:
-            for chromosome in values:
-                for slice in values[chromosome]:
-                    writer.addTranscript(setTranscript(chromosome, 1, slice, slice + sliceSize, "region%d" % (cpt), values[chromosome][slice]))
-                    cpt += 1
-        writer.write()
-
-
Binary file SMART/Java/Python/misc/Progress.pyc has changed
--- a/SMART/Java/Python/misc/RPlotter.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/misc/RPlotter.py	Fri May 20 09:25:40 2016 -0400
@@ -40,814 +40,782 @@
 """
 
 class RPlotter(object):
-	"""
-	Plot some curves
-	@ivar nbColors: number of different colors
-	@type nbColors: int
-	@ivar fileName: name of the file
-	@type fileName: string
-	@ivar lines: lines to be plotted
-	@type lines: array of dict
-	@ivar names: name of the lines
-	@type names: array of strings
-	@ivar colors: color of the lines
-	@type colors: array of strings
-	@ivar pointShapes: shape of the points
-	@type pointShapes: array of int
-	@ivar types: type of the lines (plain or dashed)
-	@type types: array of strings
-	@ivar legendNames: name of the lines printed in the legend
-	@type legendNames: array of strings
-	@ivar legendColors: color of the lines printed in the legend
-	@type legendColors: array of strings
-	@ivar legendTypes: type of the lines (plain or dashed) printed in the legend
-	@type legendTypes: array of strings
-	@ivar legendShapes: shape of points printed in the legend
-	@type legendShapes: array of int
-	@ivar format: format of the picture
-	@type format: string
-	@ivar lineWidth: width of the line in a xy-plot
-	@type lineWidth: int
-	@ivar xMin: minimum value taken on the x-axis
-	@type xMin: int
-	@ivar xMax: maximum value taken on the x-axis
-	@type xMax: int
-	@ivar yMin: minimum value taken on the y-axis
-	@type yMin: int
-	@ivar yMax: maximum value taken on the y-axis
-	@type yMax: int
-	@ivar minimumX: minimum value allowed on the x-axis
-	@type minimumX: int
-	@ivar maximumX: maximum value allowed on the x-axis
-	@type maximumX: int
-	@ivar minimumY: minimum value allowed on the y-axis
-	@type minimumY: int
-	@ivar maximumY: maximum value allowed on the y-axis
-	@type maximumY: int
-	@ivar leftMargin:  add some margin in the left part of the plot
-	@type leftMargin:  float
-	@ivar rightMargin: add some margin in the right part of the plot
-	@type rightMargin: float
-	@ivar downMargin:  add some margin at the top of the plot
-	@type downMargin:  float
-	@ivar upMargin:    add some margin at the bottom of the plot
-	@type upMargin:    float
-	@ivar logX: use log scale on the x-axis
-	@type logX: boolean
-	@ivar logY: use log scale on the y-axis
-	@type logY: boolean
-	@ivar logZ: use log scale on the z-axis (the color)
-	@type logZ: boolean
-	@ival fill: if a value is not given, fill it with given value
-	@type fill: int
-	@ival bucket: cluster the data into buckets of given size
-	@type bucket: int
-	@ival seed: a random number
-	@type seed: int
-	@ival regression: plot a linear regression
-	@type regression: boolean
-	@ival legend: set the legend
-	@type legend: boolean
-	@ival legendBySide: set the legend outside of the plot
-	@type legendBySde: boolean
-	@ival xLabel: label for the x-axis
-	@type xLabel: string
-	@ival yLabel: label for the y-axis
-	@type yLabel: string
-	@ival title: title of the plot
-	@type title: string
-	@ival barplot: use a barplot representation instead
-	@type barplot: boolean
-	@ival points: use a point cloud instead
-	@type points: boolean
-	@ival heatPoints: use a colored point cloud instead
-	@type heatPoints: boolean
-	@ival axesLabels: change the names of the axes
-	@type axesLabels: vector of 2 int to string dict
-	@ival rotateAxesLabels: rotate the axes labels
-	@type rotateAxesLabels: dict of 2 boolean
-	@ival verbosity: verbosity of the class
-	@type verbosity: int
-	@ival keep: keep temporary files
-	@type keep: boolean
-	@ival sizeLegend: size of the police for the legend
-	@type sizeLegend: float
-	"""
+    """
+    Plot some curves
+    @ivar nbColors: number of different colors
+    @type nbColors: int
+    @ivar fileName: name of the file
+    @type fileName: string
+    @ivar lines: lines to be plotted
+    @type lines: array of dict
+    @ivar names: name of the lines
+    @type names: array of strings
+    @ivar colors: color of the lines
+    @type colors: array of strings
+    @ivar types: type of the lines (plain or dashed)
+    @type types: array of strings
+    @ivar format: format of the picture
+    @type format: string
+    @ivar lineWidth: width of the line in a xy-plot
+    @type lineWidth: int
+    @ivar xMin: minimum value taken on the x-axis
+    @type xMin: int
+    @ivar xMax: maximum value taken on the x-axis
+    @type xMax: int
+    @ivar yMin: minimum value taken on the y-axis
+    @type yMin: int
+    @ivar yMax: maximum value taken on the y-axis
+    @type yMax: int
+    @ivar minimumX: minimum value allowed on the x-axis
+    @type minimumX: int
+    @ivar maximumX: maximum value allowed on the x-axis
+    @type maximumX: int
+    @ivar minimumY: minimum value allowed on the y-axis
+    @type minimumY: int
+    @ivar maximumY: maximum value allowed on the y-axis
+    @type maximumY: int
+    @ivar leftMargin:  add some margin in the left part of the plot
+    @type leftMargin:  float
+    @ivar rightMargin: add some margin in the right part of the plot
+    @type rightMargin: float
+    @ivar downMargin:  add some margin at the top of the plot
+    @type downMargin:  float
+    @ivar upMargin:    add some margin at the bottom of the plot
+    @type upMargin:    float
+    @ivar logX: use log scale on the x-axis
+    @type logX: boolean
+    @ivar logY: use log scale on the y-axis
+    @type logY: boolean
+    @ivar logZ: use log scale on the z-axis (the color)
+    @type logZ: boolean
+    @ival fill: if a value is not given, fill it with given value
+    @type fill: int
+    @ival bucket: cluster the data into buckets of given size
+    @type bucket: int
+    @ival seed: a random number
+    @type seed: int
+    @ival regression: plot a linear regression
+    @type regression: boolean
+    @ival legend: set the legend
+    @type legend: boolean
+    @ival legendBySide: set the legend outside of the plot
+    @type legendBySde: boolean
+    @ival xLabel: label for the x-axis
+    @type xLabel: string
+    @ival yLabel: label for the y-axis
+    @type yLabel: string
+    @ival title: title of the plot
+    @type title: string
+    @ival barplot: use a barplot representation instead
+    @type barplot: boolean
+    @ival points: use a point cloud instead
+    @type points: boolean
+    @ival heatPoints: use a colored point cloud instead
+    @type heatPoints: boolean
+    @ival axesLabels: change the names of the axes
+    @type axesLabels: vector of 2 int to string dict
+    @ival rotateAxesLabels: rotate the axes labels
+    @type rotateAxesLabels: dict of 2 boolean
+    @ival verbosity: verbosity of the class
+    @type verbosity: int
+    @ival keep: keep temporary files
+    @type keep: boolean
+    """
 
-	def __init__(self, fileName, verbosity = 0, keep = False):
-		"""
-		Constructor
-		@param fileName: name of the file to produce
-		@type    fileName: string
-		@param verbosity: verbosity
-		@type    verbosity: int
-		@param keep: keep temporary files
-		@type keep: boolean
-		"""
-		self.nbColors = 9
-		self.nbUsedColors = 0
-		self.fileName = fileName
-		self.verbosity = verbosity
-		self.keep = keep
-		self.format = "png"
-		self.fill = None
-		self.bucket = None
-		self.lines = []
-		self.names = []
-		self.colors = []
-		self.pointShapes = []
-		self.types = []
-		self.legendNames = []
-		self.legendColors = []
-		self.legendTypes = []
-		self.legendShapes = []
-		self.lineWidth = 1
-		self.xMin = None
-		self.xMax = None
-		self.yMin = None
-		self.yMax = None
-		self.seed = random.randint(0, 10000)
-		self.minimumX = None
-		self.maximumX = None
-		self.minimumY = None
-		self.maximumY = None
-		self.leftMargin   = 0
-		self.rightMargin  = 0
-		self.topMargin    = 0
-		self.bottomMargin = 0
-		self.logX = False
-		self.logY = False
-		self.logZ = False
-		self.regression = False
-		self.width = 1000
-		self.height = 500
-		self.legend = False
-		self.legendBySide = False
-		self.xLabel = ""
-		self.yLabel = ""
-		self.title = None
-		self.points = False
-		self.heatPoints = False
-		self.barplot = False
-		self.axesLabels = {1: None, 2: None}
-		self.rotateAxesLabels = {1: False, 2: False}
-		self.linesToAddBox = ""
-	
-	def __del__(self):
-		"""
-		Destructor
-		Remove tmp files
-		"""
-		if not self.keep:
-			scriptFileName = "tmpScript-%d.R" % (self.seed)
-			if os.path.exists(scriptFileName):
-				os.remove(scriptFileName)
-			outputFileName = "%sout" % (scriptFileName)
-			if os.path.exists(outputFileName):
-				os.remove(outputFileName)
-			nbLines = len(self.lines) + (1 if self.heatPoints else 0)
-			for i in range(nbLines):
-				if os.path.exists("tmpData-%d-%d.dat" % (self.seed, i)):
-					os.remove("tmpData-%d-%d.dat" % (self.seed, i))
+    def __init__(self, fileName, verbosity = 0, keep = False):
+        """
+        Constructor
+        @param fileName: name of the file to produce
+        @type    fileName: string
+        @param verbosity: verbosity
+        @type    verbosity: int
+        @param keep: keep temporary files
+        @type keep: boolean
+        """
+        self.nbColors = 9
+        self.fileName = fileName
+        self.verbosity = verbosity
+        self.keep = keep
+        self.format = "png"
+        self.fill = None
+        self.bucket = None
+        self.lines = []
+        self.names = []
+        self.colors = []
+        self.types = []
+        self.lineWidth = 1
+        self.xMin = None
+        self.xMax = None
+        self.yMin = None
+        self.yMax = None
+        self.seed = random.randint(0, 10000)
+        self.minimumX = None
+        self.maximumX = None
+        self.minimumY = None
+        self.maximumY = None
+        self.leftMargin   = 0
+        self.rightMargin  = 0
+        self.topMargin    = 0
+        self.bottomMargin = 0
+        self.logX = False
+        self.logY = False
+        self.logZ = False
+        self.regression = False
+        self.width = 1000
+        self.height = 500
+        self.legend = False
+        self.legendBySide = False
+        self.xLabel = ""
+        self.yLabel = ""
+        self.title = None
+        self.points = False
+        self.heatPoints = False
+        self.barplot = False
+        self.axesLabels = {1: None, 2: None}
+        self.rotateAxesLabels = {1: False, 2: False}
+        self.linesToAddBox = ""
+    
+    def __del__(self):
+        """
+        Destructor
+        Remove tmp files
+        """
+        if not self.keep:
+            scriptFileName = "tmpScript-%d.R" % (self.seed)
+            if os.path.exists(scriptFileName):
+                os.remove(scriptFileName)
+            outputFileName = "%sout" % (scriptFileName)
+            if os.path.exists(outputFileName):
+                os.remove(outputFileName)
+            nbLines = len(self.lines) + (1 if self.heatPoints else 0)
+            for i in range(nbLines):
+                if os.path.exists("tmpData-%d-%d.dat" % (self.seed, i)):
+                    os.remove("tmpData-%d-%d.dat" % (self.seed, i))
 
-		
-	def setMinimumX(self, xMin):
-		"""
-		Set the minimum value on the x-axis
-		@param xMin:minimum value on the x-axis
-		@type xMin: int
-		"""
-		self.minimumX = xMin
+        
+    def setMinimumX(self, xMin):
+        """
+        Set the minimum value on the x-axis
+        @param xMin:minimum value on the x-axis
+        @type xMin: int
+        """
+        self.minimumX = xMin
 
-		
-	def setMaximumX(self, xMax):
-		"""
-		Set the maximum value on the x-axis
-		@param xMax: maximum value on the x-axis
-		@type xMax: int
-		"""
-		self.maximumX = xMax
-		
-	
-	def setMinimumY(self, yMin):
-		"""
-		Set the minimum value on the y-axis
-		@param yMin: minimum value on the y-axis
-		@type yMin: int
-		"""
-		self.minimumY = yMin
+        
+    def setMaximumX(self, xMax):
+        """
+        Set the maximum value on the x-axis
+        @param xMax: maximum value on the x-axis
+        @type xMax: int
+        """
+        self.maximumX = xMax
+        
+    
+    def setMinimumY(self, yMin):
+        """
+        Set the minimum value on the y-axis
+        @param yMin: minimum value on the y-axis
+        @type yMin: int
+        """
+        self.minimumY = yMin
 
-		
-	def setMaximumY(self, yMax):
-		"""
-		Set the maximum value on the y-axis
-		@param yMax: maximum value on the y-axis
-		@type xmax: int
-		"""
-		self.maximumY = yMax
-		
-	
-	def setFill(self, fill):
-		"""
-		Fill empty data with given value
-		@param fill: the value to fill with
-		@type fill: int
-		"""
-		self.fill = fill
+        
+    def setMaximumY(self, yMax):
+        """
+        Set the maximum value on the y-axis
+        @param yMax: maximum value on the y-axis
+        @type xmax: int
+        """
+        self.maximumY = yMax
+        
+    
+    def setFill(self, fill):
+        """
+        Fill empty data with given value
+        @param fill: the value to fill with
+        @type fill: int
+        """
+        self.fill = fill
 
 
-	def setBuckets(self, bucket):
-		"""
-		Cluster the data into buckets of given size
-		@param bucket: the size of the buckets
-		@type bucket: int
-		"""
-		self.bucket = bucket
+    def setBuckets(self, bucket):
+        """
+        Cluster the data into buckets of given size
+        @param bucket: the size of the buckets
+        @type bucket: int
+        """
+        self.bucket = bucket
 
 
-	def setRegression(self, regression):
-		"""
-		Plot a linear regression line
-		@param regression: whether to plot the regression
-		@type  regression: bool
-		"""
-		self.regression = regression
+    def setRegression(self, regression):
+        """
+        Plot a linear regression line
+        @param regression: whether to plot the regression
+        @type  regression: bool
+        """
+        self.regression = regression
 
 
-	def setFormat(self, format):
-		"""
-		Set the format of the picture
-		@param format: the format
-		@type format: string
-		"""
-		if format not in ("png", "pdf", "jpeg", "bmp", "tiff"):
-			raise Exception("Format '%s' is not supported by RPlotter" % (format))
-		self.format = format
+    def setFormat(self, format):
+        """
+        Set the format of the picture
+        @param format: the format
+        @type format: string
+        """
+        if format not in ("png", "pdf", "jpeg", "bmp", "tiff"):
+            raise Exception("Format '%s' is not supported by RPlotter" % (format))
+        self.format = format
 
 
-	def setWidth(self, width):
-		"""
-		Set the dimensions of the image produced
-		@param width: width of the image
-		@type width: int
-		"""
-		self.width = width
-		
-		
-	def setHeight(self, height):
-		"""
-		Set the dimensions of the image produced
-		@param height: heigth of the image
-		@type height: int
-		"""
-		self.height = height
-		
-		
-	def setImageSize(self, width, height):
-		"""
-		Set the dimensions of the image produced
-		@param width: width of the image
-		@type width: int
-		@param height: heigth of the image
-		@type height: int
-		"""
-		self.setWidth(width)
-		self.setHeight(height)
-		
-		
-	def setLegend(self, legend, bySide = False):
-		"""
-		Print a legend or not
-		@param legend: print a legend
-		@type  legend: boolean
-		@param bySide: put the legend outside of the plot
-		@type  bySide: boolean
-		"""
-		self.legend       = legend
-		self.legendBySide = bySide
+    def setWidth(self, width):
+        """
+        Set the dimensions of the image produced
+        @param width: width of the image
+        @type width: int
+        """
+        self.width = width
+        
+        
+    def setHeight(self, height):
+        """
+        Set the dimensions of the image produced
+        @param height: heigth of the image
+        @type height: int
+        """
+        self.height = height
+        
+        
+    def setImageSize(self, width, height):
+        """
+        Set the dimensions of the image produced
+        @param width: width of the image
+        @type width: int
+        @param height: heigth of the image
+        @type height: int
+        """
+        self.setWidth(width)
+        self.setHeight(height)
+        
+        
+    def setLegend(self, legend, bySide = False):
+        """
+        Print a legend or not
+        @param legend: print a legend
+        @type  legend: boolean
+        @param bySide: put the legend outside of the plot
+        @type  bySide: boolean
+        """
+        self.legend       = legend
+        self.legendBySide = bySide
 
 
-	def setXLabel(self, label):
-		"""
-		Print a label for the x-axis
-		@param label: the label
-		@type label: string
-		"""
-		self.xLabel = label
-		if self.xLabel != None:
-			self.xLabel = self.xLabel.replace("_", " ")
+    def setXLabel(self, label):
+        """
+        Print a label for the x-axis
+        @param label: the label
+        @type label: string
+        """
+        self.xLabel = label
+        if self.xLabel != None:
+            self.xLabel = self.xLabel.replace("_", " ")
 
 
-	def setYLabel(self, label):
-		"""
-		Print a label for the y-axis
-		@param label: the label
-		@type label: string
-		"""
-		self.yLabel = label
-		if self.yLabel != None:
-			self.yLabel = self.yLabel.replace("_", " ")
+    def setYLabel(self, label):
+        """
+        Print a label for the y-axis
+        @param label: the label
+        @type label: string
+        """
+        self.yLabel = label
+        if self.yLabel != None:
+            self.yLabel = self.yLabel.replace("_", " ")
 
 
-	def addLeftMargin(self, margin):
-		"""
-		Increase the size of the space on the left part of the graph
-		@param margin: the space added
-		@type  margin: float
-		"""
-		self.leftMargin = margin
+    def addLeftMargin(self, margin):
+        """
+        Increase the size of the space on the left part of the graph
+        @param margin: the space added
+        @type  margin: float
+        """
+        self.leftMargin = margin
 
 
-	def addRightMargin(self, margin):
-		"""
-		Increase the size of the space on the right part of the graph
-		@param margin: the space added
-		@type  margin: float
-		"""
-		self.rightMargin = margin
+    def addRightMargin(self, margin):
+        """
+        Increase the size of the space on the right part of the graph
+        @param margin: the space added
+        @type  margin: float
+        """
+        self.rightMargin = margin
 
 
-	def addTopMargin(self, margin):
-		"""
-		Increase the size of the space at the top of the graph
-		TopMargin is a percentage if 0 < TopMargin < 1.
-		TopMargin is a value if TopMargin >= 1.
-		@param margin: the space added
-		@type  margin: float
-		"""
-		self.topMargin = margin
+    def addTopMargin(self, margin):
+        """
+        Increase the size of the space at the top of the graph
+        TopMargin is a percentage if 0 < TopMargin < 1.
+        TopMargin is a value if TopMargin >= 1.
+        @param margin: the space added
+        @type  margin: float
+        """
+        self.topMargin = margin
 
 
-	def addBottomMargin(self, margin):
-		"""
-		Increase the size of the space at the bottom of the graph
-		@param margin: the space added
-		@type  margin: float
-		"""
-		self.bottomMargin = margin
+    def addBottomMargin(self, margin):
+        """
+        Increase the size of the space at the bottom of the graph
+        @param margin: the space added
+        @type  margin: float
+        """
+        self.bottomMargin = margin
 
 
-	def getNewYMaxWithTopMargin(self):
-		"""
-		Return new xMin coordinate with left margin
-		@param xMin: coordinate
-		@type  xMin: float
-		"""
-		yMax = self.yMax
-		if 0 < self.topMargin and self.topMargin < 1:
-			topMargin = self.topMargin * self.yMax
-			yMax = self.yMax + topMargin
-		elif self.topMargin >= 1:
-			yMax = self.yMax + self.topMargin
-		return yMax
+    def getNewYMaxWithTopMargin(self):
+        """
+        Return new xMin coordinate with left margin
+        @param xMin: coordinate
+        @type  xMin: float
+        """
+        yMax = self.yMax
+        if 0 < self.topMargin and self.topMargin < 1:
+            topMargin = self.topMargin * self.yMax
+            yMax = self.yMax + topMargin
+        elif self.topMargin >= 1:
+            yMax = self.yMax + self.topMargin
+        return yMax
 
 
-	def setTitle(self, title):
-		"""
-		Print a title for graph
-		@param title: a title
-		@type title: string
-		"""
-		self.title = title
-		if self.title != None:
-			self.title = self.title.replace("_", " ")
+    def setTitle(self, title):
+        """
+        Print a title for graph
+        @param title: a title
+        @type title: string
+        """
+        self.title = title
+        if self.title != None:
+            self.title = self.title.replace("_", " ")
 
 
-	def setAxisLabel(self, i, labels):
-		"""
-		Change x- or y-labels
-		@param i: x for x-label, y for y-label
-		@type  i: string
-		@param labels: new labels
-		@type  labels: int to string dict
-		"""
-		i = i.lower()
-		if i not in ("x", "y"):
-			raise Exception("Label name '" + i + "' should by 'x' or 'y' while changing axis labels.")
-		self.axesLabels[{"x": 1, "y": 2}[i]] = labels
+    def setAxisLabel(self, i, labels):
+        """
+        Change x- or y-labels
+        @param i: x for x-label, y for y-label
+        @type  i: string
+        @param labels: new labels
+        @type  labels: int to string dict
+        """
+        i = i.lower()
+        if i not in ("x", "y"):
+            raise Exception("Label name '" + i + "' should by 'x' or 'y' while changing axis labels.")
+        self.axesLabels[{"x": 1, "y": 2}[i]] = labels
 
 
-	def rotateAxisLabel(self, i, b = True):
-		"""
-		Rotate x- or y-labels
-		@param i: x for x-label, y for y-label
-		@type  i: string
-		@param b: whether the labels should be rotated
-		@type  b: boolean
-		"""
-		i = i.lower()
-		if i not in ("x", "y"):
-			raise Exception("Label name '" + i + "' should by 'x' or 'y' while rotating axis labels.")
-		self.rotateAxesLabels[{"x": 1, "y": 2}[i]] = b
+    def rotateAxisLabel(self, i, b = True):
+        """
+        Rotate x- or y-labels
+        @param i: x for x-label, y for y-label
+        @type  i: string
+        @param b: whether the labels should be rotated
+        @type  b: boolean
+        """
+        i = i.lower()
+        if i not in ("x", "y"):
+            raise Exception("Label name '" + i + "' should by 'x' or 'y' while rotating axis labels.")
+        self.rotateAxesLabels[{"x": 1, "y": 2}[i]] = b
 
-	def setLineWidth(self, width):
-		"""
-		Set the line width in a xy-plot
-		@param width: the new line width
-		@type  width: int
-		"""
-		self.lineWidth = width
-
-	def setLog(self, log):
-		"""
-		Use log-scale for axes
-		@param log: use log scale
-		@type log: boolean
-		"""
-		self.logX = ("x" in log)
-		self.logY = ("y" in log)
-		self.logZ = ("z" in log)
-		
+    def setLineWidth(self, width):
+        """
+        Set the line width in a xy-plot
+        @param width: the new line width
+        @type  width: int
+        """
+        self.lineWidth = width
 
-	def setBarplot(self, barplot):
-		"""
-		Use barplot representation instead
-		@param barplot: barplot representation
-		@type barplot: boolean
-		"""
-		self.barplot = barplot
-		
-
-	def setPoints(self, points):
-		"""
-		Use points cloud representation instead
-		@param points: points cloud representation
-		@type points: boolean
-		"""
-		self.points = points
-		
+    def setLog(self, log):
+        """
+        Use log-scale for axes
+        @param log: use log scale
+        @type log: boolean
+        """
+        self.logX = ("x" in log)
+        self.logY = ("y" in log)
+        self.logZ = ("z" in log)
+        
 
-	def setHeatPoints(self, heatPoints):
-		"""
-		Use points cloud representation with color representing another variable instead
-		@param points: colored points cloud representation
-		@type points: boolean
-		"""
-		self.heatPoints = heatPoints
+    def setBarplot(self, barplot):
+        """
+        Use barplot representation instead
+        @param barplot: barplot representation
+        @type barplot: boolean
+        """
+        self.barplot = barplot
+        
 
-	def setSizePoliceLegend(self, sizeLegend):
-		"""
-		Modify the size of the police for the legend only
-		@param size: a size
-		@type size: float
-		"""
-		self.sizeLegend = sizeLegend
+    def setPoints(self, points):
+        """
+        Use points cloud representation instead
+        @param points: points cloud representation
+        @type points: boolean
+        """
+        self.points = points
+        
 
-	def addBox(self, lXCoordList, minY, maxY):
-		for lXCoord in lXCoordList:
-			self.linesToAddBox += "rect(%s,%s,%s,%s,density=50, col='grey',border='transparent')\n" % (lXCoord[0], minY, lXCoord[1], maxY)
+    def setHeatPoints(self, heatPoints):
+        """
+        Use points cloud representation with color representing another variable instead
+        @param points: colored points cloud representation
+        @type points: boolean
+        """
+        self.heatPoints = heatPoints
+
 
-	def addColor(self, name, color):
-		if name == "" or name not in self.names:
-			if color == None:
-				colorNumber = self.nbUsedColors % (self.nbColors - 1) + 1
-				type        = "solid" if len(self.colors) < self.nbColors else "dashed"
-				color       = "colorPanel[%d]" % (colorNumber)
-				self.nbUsedColors += 1
-			else:
-				color = "\"%s\"" % (color)
-				type  = "solid"
-			shape = 0 if not self.pointShapes else self.pointShapes[-1]+1
-			self.legendNames.append(name)
-			self.legendColors.append(color)
-			self.legendTypes.append(type)
-			self.legendShapes.append(shape)
-		else:
-			index = self.names.index(name)
-			color = self.colors[index]
-			type  = self.types[index]
-			shape = self.pointShapes[index]
-		self.colors.append(color)
-		self.types.append(type)
-		self.pointShapes.append(shape)
-	
-	def addLine(self, line, name = "", color = None):
-		"""
-		Add a line 
-		@param line: a line to plot
-		@type line: dict
-		"""
-		# prepare data
-		plot = []
-		if type(line[line.keys()[0]]) is dict:
-			values = line.values()
-		elif self.fill == None:
-			values = sorted(line.keys())
-		else:
-			values = range(min(line.keys()), max(line.keys()) + 1)
-			
-		for element in values:
-			if element is list:
-				x = element[0]
-				y = element[1]
-			else:
-				x = element
-				if x not in line:
-					y = self.fill
-				else:
-					y = line[x]
-				
-			if self.minimumX != None and x < self.minimumX:
-				continue
-			if self.maximumX != None and x > self.maximumX:
-				continue
-			
-			if x == None:
-				raise Exception("Problem! x is None. Aborting...")
-			if y == None:
-				raise Exception("Problem! y is None. Aborting...")
-			if x == 0 and self.logX:
-				x = minPositiveValue
-			if y == 0 and self.logY:
-				y = minPositiveValue
-			if self.xMin == None:
-				if not self.logX or x != 0:
-					self.xMin = x
-			else:
-				if not self.logX or x != 0:
-					self.xMin = min(self.xMin, x)
-			if self.xMax == None:
-				self.xMax = x
-			else:
-				self.xMax = max(self.xMax, x)
-			if self.yMin == None:
-				if not self.logY or y != 0:
-					self.yMin = y
-			else:
-				if not self.logY or y != 0:
-					if y != "NA":
-						self.yMin = min(self.yMin, y)
-			if self.yMax == None:
-				self.yMax = y
-			else:
-				if y != "NA":
-					self.yMax = max(self.yMax, y)
+    def addBox(self, lXCoordList, minY, maxY):
+        for lXCoord in lXCoordList:
+            self.linesToAddBox += "rect(%s,%s,%s,%s,density=50, col='grey',border='transparent')\n" % (lXCoord[0], minY, lXCoord[1], maxY)
+    
+    def addLine(self, line, name = "", color = None):
+        """
+        Add a line 
+        @param line: a line to plot
+        @type line: dict
+        """
+        # prepare data
+        plot = []
+        if self.points or self.heatPoints:
+            values = line.values()
+        elif self.fill == None:
+            values = sorted(line.keys())
+        else:
+            values = range(min(line.keys()), max(line.keys()) + 1)
+            
+        for element in values:
+            if self.points or self.heatPoints:
+                x = element[0]
+                y = element[1]
+            else:
+                x = element
+                if x not in line:
+                    y = self.fill
+                else:
+                    y = line[x]
+                
+            if self.minimumX != None and x < self.minimumX:
+                continue
+            if self.maximumX != None and x > self.maximumX:
+                continue
+            
+            if x == None:
+                raise Exception("Problem! x is None. Aborting...")
+            if y == None:
+                raise Exception("Problem! y is None. Aborting...")
+            if x == 0 and self.logX:
+                x = minPositiveValue
+            if y == 0 and self.logY:
+                y = minPositiveValue
+            if self.xMin == None:
+                if not self.logX or x != 0:
+                    self.xMin = x
+            else:
+                if not self.logX or x != 0:
+                    self.xMin = min(self.xMin, x)
+            if self.xMax == None:
+                self.xMax = x
+            else:
+                self.xMax = max(self.xMax, x)
+            if self.yMin == None:
+                if not self.logY or y != 0:
+                    self.yMin = y
+            else:
+                if not self.logY or y != 0:
+                    if y != "NA":
+                        self.yMin = min(self.yMin, y)
+            if self.yMax == None:
+                self.yMax = y
+            else:
+                if y != "NA":
+                    self.yMax = max(self.yMax, y)
 
-			plot.append((x, y))
-
-		# cluster the data into buckets
-		if self.bucket != None:
-			buckets = dict([((int(value) / int(self.bucket)) * self.bucket, 0) for value in xrange(min(line.keys()), max(line.keys())+1)])
-			for distance, nb in line.iteritems():
-				buckets[(int(distance) / int(self.bucket)) * self.bucket] += nb
-			self.yMax = max(buckets.values())
-			plot = []
-			for x, y in buckets.iteritems():
-				plot.append((x, y))
+            plot.append((x, y))
 
-		# write file
-		dataFileName = "tmpData-%d-%d.dat" % (self.seed, len(self.lines))
-		dataHandle = open(dataFileName, "w")
-		if not self.heatPoints:
-			plot.sort()
-		for (x, y) in plot:
-			if y != "NA":
-				dataHandle.write("%f\t%f\n" % (x, y))
-			else:
-				dataHandle.write("%f\t%s\n" % (x, y))
-		dataHandle.close()
-
-		self.addColor(name, color)
-		self.lines.append(line)
-		self.names.append(name)
+        # cluster the data into buckets
+        if self.bucket != None:
+            buckets = dict([((int(value) / int(self.bucket)) * self.bucket, 0) for value in xrange(min(line.keys()), max(line.keys())+1)])
+            for distance, nb in line.iteritems():
+                buckets[(int(distance) / int(self.bucket)) * self.bucket] += nb
+            self.yMax = max(buckets.values())
+            plot = []
+            for x, y in buckets.iteritems():
+                plot.append((x, y))
 
-	def addHeatLine(self, line, name = "", color = None):
-		"""
-		Add the heat line 
-		@param line: the line which gives the color of the points
-		@type    line: dict
-		"""
-		if not self.heatPoints:
-			raise Exception("Error! Trying to add a heat point whereas not mentioned to earlier! Aborting.")
-			
-		dataFileName = "tmpData-%d-%d.dat" % (self.seed, len(self.lines))
-		dataHandle = open(dataFileName, "w")
-	
-		minimumHeat = min(line.values())
-		maximumHeat = max(line.values())
-		minLogValue = 0.00001
-		log = self.logZ
-		
-		if log:
-			if minimumHeat == 0:
-				for element in line:
-					line[element] += minLogValue
-				minimumHeat += minLogValue
-				maximumHeat += minLogValue
-			minimumHeat = math.log10(minimumHeat)
-			maximumHeat = math.log10(maximumHeat)
-		
-		coeff = 255.0 / (maximumHeat - minimumHeat)
+        # write file
+        dataFileName = "tmpData-%d-%d.dat" % (self.seed, len(self.lines))
+        dataHandle = open(dataFileName, "w")
+        if not self.heatPoints:
+            plot.sort()
+        for (x, y) in plot:
+            if y != "NA":
+                dataHandle.write("%f\t%f\n" % (x, y))
+            else:
+                dataHandle.write("%f\t%s\n" % (x, y))
+        dataHandle.close()
 
-		for element in line:
-			value = line[element]
-			if log:
-				value = math.log10(max(minLogValue, value))
-			dataHandle.write("\"#%02X%02X00\"\n" % (int((value - minimumHeat) * coeff), 255 - int((value - minimumHeat) * coeff)))
+        self.lines.append(line)
+        self.names.append(name)
 
-		dataHandle.close()
-		self.addColor(name, color)
-		self.names.append(name)
+        if color == None:
+            colorNumber = len(self.colors) % (self.nbColors - 1) + 1
+            type = "solid"
+            if len(self.colors) >= self.nbColors:
+                type = "dashed"
+            color = "colorPanel[%d]" % (colorNumber)
+        else:
+            color = "\"%s\"" % (color)
+            type = "solid"
+        self.colors.append(color)
+        self.types.append(type)
 
 
-	def getScript(self):
-		"""
-		Write (unfinished) R script
-		"""
-		script = ""
-
-		xMin = self.xMin - self.leftMargin
-		if self.minimumX != None:
-			xMin = max(xMin, self.minimumX)
-		xMax = self.xMax + self.rightMargin
-		if self.maximumX != None:
-			xMax = min(xMax, self.maximumX)
-		yMin = self.yMin - self.bottomMargin
-		if self.minimumY != None:
-			yMin = self.minimumY
-		yMax = self.getNewYMaxWithTopMargin()
-		yMax += min(1, yMax / 100.0)
-		if self.maximumY != None:
-			yMax = self.maximumY
-
-		log = ""
-		if self.logX:
-			log += "x"
-		if self.logY:
-			log += "y"
-		if log != "":
-			log = ", log=\"%s\"" % (log)
-
-		title = ""
-		if self.title != None:
-			title = ", main = \"%s\"" % (self.title)
-
-		if self.legend and self.legendBySide:
-			script += "layout(matrix(c(1,2), 1, 2), widths=c(5,1))\n"
-
-		if self.rotateAxesLabels[2]:
-			script += "par(mar=c(5,12,4,2))\n"
-		else:
-			script += "par(mar=c(5,5,4,2))\n"
-
-		addAxes = True
+    def addHeatLine(self, line, name = "", color = None):
+        """
+        Add the heat line 
+        @param line: the line which gives the color of the points
+        @type    line: dict
+        """
+        if not self.heatPoints:
+            raise Exception("Error! Trying to add a heat point whereas not mentioned to earlier! Aborting.")
+            
+        dataFileName = "tmpData-%d-%d.dat" % (self.seed, len(self.lines))
+        dataHandle = open(dataFileName, "w")
+    
+        minimumHeat = min(line.values())
+        maximumHeat = max(line.values())
+        minLogValue = 0.00001
+        log = self.logZ
+        
+        if log:
+            if minimumHeat == 0:
+                for element in line:
+                    line[element] += minLogValue
+                minimumHeat += minLogValue
+                maximumHeat += minLogValue
+            minimumHeat = math.log10(minimumHeat)
+            maximumHeat = math.log10(maximumHeat)
+        
+        coeff = 255.0 / (maximumHeat - minimumHeat)
 
-		if self.barplot:
-			script += "data = scan(\"tmpData-%d-0.dat\", list(x = -666, y = -666))\n" % (self.seed)
-			if len(self.lines) == 1:
-				script += "barplot(data$y, name = data$x, xlab=\"%s\", ylab=\"%s\", ylim = c(%f, %f), cex.axis = 2, cex.names = 2, cex.lab = 2%s%s)\n" % (self.xLabel, self.yLabel, yMin, yMax, title, log)
-				addAxes = False
-			else:
-				script += "data1 = scan(\"tmpData-%d-1.dat\", list(x = -666, y = -666))\n" % (self.seed)
-				script += "barplot(rbind(data$y, data1$y), name = data$x, xlab=\"%s\", ylab=\"%s\", cex.axis = 2, cex.names = 2, cex.lab = 2%s, beside = TRUE, space=c(-1,0), axes = FALSE%s)\n" % (self.xLabel, self.yLabel, title, log)
-		elif self.points:
-			script += "plot(x = NA, y = NA, panel.first = grid(lwd = 1.0), xlab=\"%s\", ylab=\"%s\", xlim = c(%f, %f), ylim = c(%f, %f), cex.axis = 2, cex.lab = 2, axes = FALSE%s%s)\n" % (self.xLabel, self.yLabel, xMin, xMax, yMin, yMax, title, log)
-			for i in range(0, len(self.lines)):
-				script += "data = scan(\"tmpData-%d-%d.dat\", list(x = -666.666, y = -666.666))\n" % (self.seed, i)
-				script += "points(x = data$x, y = data$y, col = %s, pch=%d, lwd = %d)\n" % (self.colors[i], self.pointShapes[i], self.lineWidth)
-			if self.regression:
-				if len(self.lines) != 1:
-					raise Exception("Error! Cannot compute regression with multiple data sets...")
-				x = "log10(data$x)" if self.logX else "data$x"
-				y = "log10(data$y)" if self.logY else "data$y"
-				script += "abline(lm(%s ~ %s))\n" % (y, x)
-		elif self.heatPoints:
-			if len(self.lines) != 1:
-				raise Exception("Error! Bad number of input data! Aborting...")
-			script += "data = scan(\"tmpData-%d-0.dat\", list(x = -666, y = -666))\n" % (self.seed)
-			script += "heatData = scan(\"tmpData-%d-1.dat\", list(x = \"\"))\n" % (self.seed)
-			script += "plot(data$x, data$y, col=heatData$x, xlab=\"%s\", ylab=\"%s\", cex.axis = 2, cex.lab = 2, axes = FALSE%s%s)\n" % (self.xLabel, self.yLabel, title, log)
-			if self.regression:
-				x = "log10(data$x)" if self.logX else "data$x"
-				y = "log10(data$y)" if self.logY else "data$y"
-				script += "abline(lm(%s ~ %s))\n" % (y, x)
-		else:
-			script += "plot(x = NA, y = NA, panel.first = grid(lwd = 1.0), xlab=\"%s\", ylab=\"%s\", xlim = c(%f, %f), ylim = c(%f, %f), cex.axis = 2, cex.lab = 2, axes = FALSE%s%s)\n" % (self.xLabel, self.yLabel, xMin, xMax, yMin, yMax, title, log)
-			for i in range(0, len(self.lines)):
-				script += "data = scan(\"tmpData-%d-%d.dat\", list(x = -666.666, y = -666.666))\n" % (self.seed, i)
-				script += "lines(x = data$x, y = data$y, col = %s, lty = \"%s\", lwd = %d)\n" % (self.colors[i], self.types[i], self.lineWidth)
-				
-			script += self.linesToAddBox
-				
-		if addAxes:
-			for i in self.axesLabels:
-				rotation = ", las = 2" if self.rotateAxesLabels[i] else ""
-				if self.axesLabels[i] == None:
-					script += "axis(%d, cex.axis = 2, cex.lab = 2%s)\n" % (i, rotation)
-				else:
-					oldKeys = ", ".join(["%d" % (key) for key in sorted(self.axesLabels[i].keys())])
-					newKeys = ", ".join(["\"%s\"" % (self.axesLabels[i][key]) for key in sorted(self.axesLabels[i].keys())])
-					script += "axis(%d, at=c(%s), lab=c(%s), cex.axis = 2, cex.lab = 2%s)\n" % (i, oldKeys, newKeys, rotation)
-		script += "box()\n"
+        for element in line:
+            value = line[element]
+            if log:
+                value = math.log10(max(minLogValue, value))
+            dataHandle.write("\"#%02X%02X00\"\n" % (int((value - minimumHeat) * coeff), 255 - int((value - minimumHeat) * coeff)))
 
-		if self.legend:
-			if self.legendBySide:
-				script += "plot.new()\n"
-				script += "par(mar=c(0,0,0,0))\n"
-				script += "plot.window(c(0,1), c(0,1))\n"
-			script += "legends   = c(%s)\n" % ", ".join(["\"%s\"" % name  for name  in self.legendNames])
-			script += "colors    = c(%s)\n" % ", ".join(["%s" %     color for color in self.legendColors])
-			script += "lineTypes = c(%s)\n" % ", ".join((["NA"] * len(self.legendTypes)) if self.points else (["\"%s\"" % type  for type  in self.legendTypes]))
-			if self.points:
-				script += "pointShapes = c(%s)\n" % ", ".join(["%d" % shape for shape in self.legendShapes])
-			shape = ", pch = pointShapes" if self.points else ""
-			pos   = "0, 1" if self.legendBySide else "\"topright\""
-			script += "legend(%s, legend = legends, xjust = 0, yjust = 1, col = colors, lty = lineTypes, lwd = %d, cex = %f, ncol = 1, bg = \"white\"%s)\n" % (pos, self.lineWidth, self.sizeLegend, shape)
-
-		return script
-			
+        dataHandle.close()
+        self.names.append(name)
+        if color == None:
+            colorNumber = len(self.colors) % (self.nbColors - 1) + 1
+            type = "solid"
+            if len(self.colors) >= self.nbColors:
+                type = "dashed"
+            color = "colorPanel[%d]" % (colorNumber)
+        else:
+            color = "\"%s\"" % (color)
+            type = "solid"
+        self.colors.append(color)
+        self.types.append(type)
 
 
-	def plot(self):
-		"""
-		Plot the lines
-		"""
-		scriptFileName = "tmpScript-%d.R" % (self.seed)
-		scriptHandle = open(scriptFileName, "w")
-		scriptHandle.write("library(RColorBrewer)\n")
-		scriptHandle.write("colorPanel = brewer.pal(n=%d, name=\"Set1\")\n" % (self.nbColors))
-		scriptHandle.write("%s(%s = \"%s\", width = %d, height = %d, bg = \"white\")\n" % (self.format, "filename" if self.format != "pdf" else "file", self.fileName, self.width, self.height))
-		scriptHandle.write(self.getScript())
-		scriptHandle.write("dev.off()\n")
-		scriptHandle.close()
-		rCommand = "R"
-		if "SMARTRPATH" in os.environ:
-			rCommand = os.environ["SMARTRPATH"]
-		command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
-		status = subprocess.call(command, shell=True)
+    def getScript(self):
+        """
+        Write (unfinished) R script
+        """
+        script = ""
+
+        xMin = self.xMin - self.leftMargin
+        if self.minimumX != None:
+            xMin = max(xMin, self.minimumX)
+        xMax = self.xMax + self.rightMargin
+        if self.maximumX != None:
+            xMax = min(xMax, self.maximumX)
+        yMin = self.yMin - self.bottomMargin
+        if self.minimumY != None:
+            yMin = self.minimumY
+        yMax = self.getNewYMaxWithTopMargin()
+        yMax += min(1, yMax / 100.0)
+        if self.maximumY != None:
+            yMax = self.maximumY
 
-		if status != 0:
-			self.keep = True
-			raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))
-			
+        log = ""
+        if self.logX:
+            log += "x"
+        if self.logY:
+            log += "y"
+        if log != "":
+            log = ", log=\"%s\"" % (log)
+
+        title = ""
+        if self.title != None:
+            title = ", main = \"%s\"" % (self.title)
+
+        if self.legend and self.legendBySide:
+            script += "layout(matrix(c(1,2), 1, 2), widths=c(5,1))\n"
+
+        if self.rotateAxesLabels[2]:
+            script += "par(mar=c(5,12,4,2))\n"
+        else:
+            script += "par(mar=c(5,5,4,2))\n"
+
+        addAxes = True
 
-	def getCorrelationData(self):
-		if not self.regression:
-			return ""
-		scriptFileName = "tmpScript-%d.R" % (self.seed)
-		rScript = open(scriptFileName, "w")
-		rScript.write("data = scan(\"tmpData-%d-0.dat\", list(x = -0.000000, y = -0.000000))\n" % (self.seed))
-		x = "log10(data$x)" if self.logX else "data$x"
-		y = "log10(data$y)" if self.logY else "data$y"
-		rScript.write("summary(lm(%s ~ %s))\n" % (y, x))
-		rScript.close()
-		rCommand = "R"
-		if "SMARTRPATH" in os.environ:
-			rCommand = os.environ["SMARTRPATH"]
-		command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
-		status = subprocess.call(command, shell=True)
-		if status != 0:
-			self.keep = True
-			raise Exception("Problem with the execution of script file %s computing the correlation, status is: %s" % (scriptFileName, status))
-		outputRFile = open("%sout" % (scriptFileName))
-		output      = ""
-		start       = False
-		end         = False
-		for line in outputRFile:
-			if start and "> " in line:
-				end = True
-			if start and not end:
-				output += line
-			if "summary" in line:
-				start = True
-		return output
+        if self.barplot:
+            script += "data = scan(\"tmpData-%d-0.dat\", list(x = -666, y = -666))\n" % (self.seed)
+            if len(self.lines) == 1:
+                script += "barplot(data$y, name = data$x, xlab=\"%s\", ylab=\"%s\", ylim = c(%f, %f), cex.axis = 2, cex.names = 2, cex.lab = 2%s%s)\n" % (self.xLabel, self.yLabel, yMin, yMax, title, log)
+                addAxes = False
+            else:
+                script += "data1 = scan(\"tmpData-%d-1.dat\", list(x = -666, y = -666))\n" % (self.seed)
+                script += "barplot(rbind(data$y, data1$y), name = data$x, xlab=\"%s\", ylab=\"%s\", cex.axis = 2, cex.names = 2, cex.lab = 2%s, beside = TRUE, space=c(-1,0), axes = FALSE%s)\n" % (self.xLabel, self.yLabel, title, log)
+        elif self.points:
+            script += "data = scan(\"tmpData-%d-0.dat\", list(x = -666, y = -666))\n" % (self.seed)
+            script += "plot(data$x, data$y, xlab=\"%s\", ylab=\"%s\", cex.axis = 2, cex.lab = 2, axes = FALSE%s%s)\n" % (self.xLabel, self.yLabel, title, log)
+            if self.regression:
+                x = "log10(data$x)" if self.logX else "data$x"
+                y = "log10(data$y)" if self.logY else "data$y"
+                script += "abline(lm(%s ~ %s))\n" % (y, x)
+        elif self.heatPoints:
+            if len(self.lines) != 1:
+                raise Exception("Error! Bad number of input data! Aborting...")
+            script += "data = scan(\"tmpData-%d-0.dat\", list(x = -666, y = -666))\n" % (self.seed)
+            script += "heatData = scan(\"tmpData-%d-1.dat\", list(x = \"\"))\n" % (self.seed)
+            script += "plot(data$x, data$y, col=heatData$x, xlab=\"%s\", ylab=\"%s\", cex.axis = 2, cex.lab = 2, axes = FALSE%s%s)\n" % (self.xLabel, self.yLabel, title, log)
+            if self.regression:
+                x = "log10(data$x)" if self.logX else "data$x"
+                y = "log10(data$y)" if self.logY else "data$y"
+                script += "abline(lm(%s ~ %s))\n" % (y, x)
+        else:
+            script += "plot(x = NA, y = NA, panel.first = grid(lwd = 1.0), xlab=\"%s\", ylab=\"%s\", xlim = c(%f, %f), ylim = c(%f, %f), cex.axis = 2, cex.lab = 2, axes = FALSE%s%s)\n" % (self.xLabel, self.yLabel, xMin, xMax, yMin, yMax, title, log)
+            for i in range(0, len(self.lines)):
+                script += "data = scan(\"tmpData-%d-%d.dat\", list(x = -666.666, y = -666.666))\n" % (self.seed, i)
+                script += "lines(x = data$x, y = data$y, col = %s, lty = \"%s\", lwd = %d)\n" % (self.colors[i], self.types[i], self.lineWidth)
+                
+            script += self.linesToAddBox
+                
+        if addAxes:
+            for i in self.axesLabels:
+                rotation = ", las = 2" if self.rotateAxesLabels[i] else ""
+                if self.axesLabels[i] == None:
+                    script += "axis(%d, cex.axis = 2, cex.lab = 2%s)\n" % (i, rotation)
+                else:
+                    oldKeys = ", ".join(["%d" % (key) for key in sorted(self.axesLabels[i].keys())])
+                    newKeys = ", ".join(["\"%s\"" % (self.axesLabels[i][key]) for key in sorted(self.axesLabels[i].keys())])
+                    script += "axis(%d, at=c(%s), lab=c(%s), cex.axis = 2, cex.lab = 2%s)\n" % (i, oldKeys, newKeys, rotation)
+        script += "box()\n"
+
+        if self.legend:
+            if self.legendBySide:
+                script += "plot.new()\n"
+                script += "par(mar=c(0,0,0,0))\n"
+                script += "plot.window(c(0,1), c(0,1))\n"
+            script += "legends   = c(%s)\n" % ", ".join(["\"%s\"" % name  for name  in self.names])
+            script += "colors    = c(%s)\n" % ", ".join(["%s" %     color for color in self.colors])
+            script += "lineTypes = c(%s)\n" % ", ".join(["\"%s\"" % type  for type  in self.types])
+            if self.legendBySide:
+                script += "legend(0, 1, legend = legends, xjust = 0, yjust = 1, col = colors, lty = lineTypes, lwd = %d, cex = 1.5, ncol = 1, bg = \"white\")\n" % (self.lineWidth)
+            else:
+                script += "legend(\"topright\", legend = legends, xjust = 0, yjust = 1, col = colors, lty = lineTypes, lwd = %d, cex = 1.5, ncol = 1, bg = \"white\")\n" % (self.lineWidth)
+
+        return script
+            
 
 
-	def getSpearmanRho(self):
-		"""
-		Get the Spearman rho correlation using R
-		"""
-		return None
-		if not self.points and not self.barplot and not self.heatPoints:
-			raise Exception("Cannot compute Spearman rho correlation whereas not in 'points' or 'bar' mode.")
-		
-		scriptFileName = "tmpScript-%d.R" % (self.seed)
-		rScript = open(scriptFileName, "w")
-		rScript.write("library(Hmisc)\n")
-		rScript.write("data = scan(\"tmpData-%d-0.dat\", list(x = -0.000000, y = -0.000000))\n" % (self.seed))
-		rScript.write("spearman(data$x, data$y)\n")
-		rScript.close()
+    def plot(self):
+        """
+        Plot the lines
+        """
+        scriptFileName = "tmpScript-%d.R" % (self.seed)
+        scriptHandle = open(scriptFileName, "w")
+        scriptHandle.write("library(RColorBrewer)\n")
+        scriptHandle.write("colorPanel = brewer.pal(n=%d, name=\"Set1\")\n" % (self.nbColors))
+        scriptHandle.write("%s(%s = \"%s\", width = %d, height = %d, bg = \"white\")\n" % (self.format, "filename" if self.format != "pdf" else "file", self.fileName, self.width, self.height))
+        scriptHandle.write(self.getScript())
+        scriptHandle.write("dev.off()\n")
+        scriptHandle.close()
+        rCommand = "R"
+        if "SMARTRPATH" in os.environ:
+            rCommand = os.environ["SMARTRPATH"]
+        command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
+        status = subprocess.call(command, shell=True)
+
+        if status != 0:
+            self.keep = True
+            raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))
+            
 
-		rCommand = "R"
-		if "SMARTRPATH" in os.environ:
-			rCommand = os.environ["SMARTRPATH"]
-		command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
-		status = subprocess.call(command, shell=True)
+    def getCorrelationData(self):
+        if not self.regression:
+            return ""
+        scriptFileName = "tmpScript-%d.R" % (self.seed)
+        rScript = open(scriptFileName, "w")
+        rScript.write("data = scan(\"tmpData-%d-0.dat\", list(x = -0.000000, y = -0.000000))\n" % (self.seed))
+        x = "log10(data$x)" if self.logX else "data$x"
+        y = "log10(data$y)" if self.logY else "data$y"
+        rScript.write("summary(lm(%s ~ %s))\n" % (y, x))
+        rScript.close()
+        rCommand = "R"
+        if "SMARTRPATH" in os.environ:
+            rCommand = os.environ["SMARTRPATH"]
+        command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
+        status = subprocess.call(command, shell=True)
+        if status != 0:
+            self.keep = True
+            raise Exception("Problem with the execution of script file %s computing the correlation, status is: %s" % (scriptFileName, status))
+        outputRFile = open("%sout" % (scriptFileName))
+        output      = ""
+        start       = False
+        end         = False
+        for line in outputRFile:
+            if start and "> " in line:
+                end = True
+            if start and not end:
+                output += line
+            if "summary" in line:
+                start = True
+        return output
 
-		if status != 0:
-			self.keep = True
-			raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))
 
-		outputRFile = open("%sout" % (scriptFileName))
-		nextLine = False
-		for line in outputRFile:
-			line = line.strip()
-			if nextLine:
-				if line == "NA":
-					return None
-				return float(line)
-				nextLine = False
-			if line == "rho":
-				nextLine = True
+    def getSpearmanRho(self):
+        """
+        Get the Spearman rho correlation using R
+        """
+        return None
+        if not self.points and not self.barplot and not self.heatPoints:
+            raise Exception("Cannot compute Spearman rho correlation whereas not in 'points' or 'bar' mode.")
+        
+        scriptFileName = "tmpScript-%d.R" % (self.seed)
+        rScript = open(scriptFileName, "w")
+        rScript.write("library(Hmisc)\n")
+        rScript.write("data = scan(\"tmpData-%d-0.dat\", list(x = -0.000000, y = -0.000000))\n" % (self.seed))
+        rScript.write("spearman(data$x, data$y)\n")
+        rScript.close()
 
-		return None
+        rCommand = "R"
+        if "SMARTRPATH" in os.environ:
+            rCommand = os.environ["SMARTRPATH"]
+        command = "\"%s\" CMD BATCH %s" % (rCommand, scriptFileName)
+        status = subprocess.call(command, shell=True)
+
+        if status != 0:
+            self.keep = True
+            raise Exception("Problem with the execution of script file %s, status is: %s" % (scriptFileName, status))
+
+        outputRFile = open("%sout" % (scriptFileName))
+        nextLine = False
+        for line in outputRFile:
+            line = line.strip()
+            if nextLine:
+                if line == "NA":
+                    return None
+                return float(line)
+                nextLine = False
+            if line == "rho":
+                nextLine = True
+
+        return None
Binary file SMART/Java/Python/misc/RPlotter.pyc has changed
Binary file SMART/Java/Python/misc/UnlimitedProgress.pyc has changed
Binary file SMART/Java/Python/misc/Utils.pyc has changed
Binary file SMART/Java/Python/misc/__init__.pyc has changed
Binary file SMART/Java/Python/mySql/MySqlExonTable.pyc has changed
Binary file SMART/Java/Python/mySql/MySqlTable.pyc has changed
Binary file SMART/Java/Python/mySql/MySqlTranscriptTable.pyc has changed
Binary file SMART/Java/Python/mySql/__init__.pyc has changed
--- a/SMART/Java/Python/ncList/FileSorter.py	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/Java/Python/ncList/FileSorter.py	Fri May 20 09:25:40 2016 -0400
@@ -41,6 +41,7 @@
 from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
 
 BUFFER_SIZE = 100 * 1024
+NB_FILES    = 10000000000
 
 class FileSorter(object):
 
@@ -120,7 +121,18 @@
 			outputChunk.write(pickle.dumps(transcript, -1))
 		outputChunk.close()
 		
-	def _merge(self, chunks):
+	def _merge(self, chunks, chromosome, outputHandle):
+		currentOutputChunkId = len(chunks)
+		while len(chunks) > NB_FILES:
+			outputChunk           = open("%s_%s_%06i.tmp" % (self._prefix, chromosome, currentChunkId), "wb", 32000)
+			currentOutputChunkId += 1
+			currentChunks         = chunks[:NB_FILES]
+			chunks                = chunks[NB_FILES:] + outputChunk
+			self._mergeParts(currentChunks, outputChunk)
+			outputChunk.close()
+		self._mergeParts(chunks, outputHandle)
+
+	def _mergeParts(self, chunks, outputHandle):
 		values = []
 		for chunk in chunks:
 			chunk = open(chunk.name, "rb")
@@ -138,8 +150,8 @@
 			else:
 				heappush(values, (start, end, transcript, chunk))
 		while values:
-			start, end, transcript, chunk = heappop(values)
-			yield transcript
+			start, end, transcript, chUnk = heappop(values)
+			pickle.dump(transcript, outputHandle, -1)
 			try:
 				transcript = pickle.load(chunk)
 				start	   = transcript.getStart()
@@ -189,8 +201,7 @@
 				if self._perChromosome:
 					self._outputFileNames[chromosome] = "%s_%s.pkl" % (self._outputFileName, chromosome)
 					outputHandle = open(self._outputFileNames[chromosome], "wb")
-				for sequence in self._merge(self._chunks[chromosome]):
-					pickle.dump(sequence, outputHandle, -1)
+				self._merge(self._chunks[chromosome], chromosome, outputHandle)
 				if self._perChromosome:
 					outputHandle.close()
 				progress.inc()
Binary file SMART/Java/Python/ncList/FileSorter.pyc has changed
Binary file SMART/Java/Python/ncList/NCIndex.pyc has changed
Binary file SMART/Java/Python/ncList/NCList.pyc has changed
Binary file SMART/Java/Python/ncList/NCListCursor.pyc has changed
Binary file SMART/Java/Python/ncList/NCListFilePickle.pyc has changed
Binary file SMART/Java/Python/ncList/__init__.pyc has changed
Binary file SMART/Java/Python/structure/Bins.pyc has changed
Binary file SMART/Java/Python/structure/Interval.pyc has changed
Binary file SMART/Java/Python/structure/Mapping.pyc has changed
Binary file SMART/Java/Python/structure/Sequence.pyc has changed
Binary file SMART/Java/Python/structure/SequenceList.pyc has changed
Binary file SMART/Java/Python/structure/SubMapping.pyc has changed
Binary file SMART/Java/Python/structure/Transcript.pyc has changed
Binary file SMART/Java/Python/structure/TranscriptContainer.pyc has changed
Binary file SMART/Java/Python/structure/TranscriptList.pyc has changed
Binary file SMART/Java/Python/structure/__init__.pyc has changed
Binary file SMART/Java/__init__.pyc has changed
Binary file SMART/__init__.pyc has changed
--- a/SMART/galaxy/CleanTranscriptFile.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/CleanTranscriptFile.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="CleanTranscriptFile" name="clean transcript file">
+<tool id="CleanTranscriptFile" name="clean transcript file" version="1.0.0">
 	<description>Clean a transcript file so that it is useable for S-MART.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -61,17 +61,11 @@
 		</data>
 
 	</outputs>
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
-      <param name="type" value="No" />
-      <output name="outputFile" file="exp_cleantranscriptfile_genes.gtf" />
-    </test>
-  </tests>
 
 	<help>
 		A GFF/GTF file (please consult http://www.sequenceontology.org/gff3.shtml to know more about the GFF3 format, and http://mblab.wustl.edu/GTF22.html for the GTF format) may contain different sources of information: chromosome size, genes, transcripts, etc. S-MART mostly works on transcripts. This scripts filters the input file to keep the information you really want, based on the feature (3rd column).
 	</help>
-
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
 </tool>
--- a/SMART/galaxy/Clusterize.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/Clusterize.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,27 +1,11 @@
-<tool id="MergingDataClusterize" name="clusterize">
+<tool id="MergingDataClusterize" name="clusterize" version="1.0.0">
 	<description>Clusterize features when their genomic intervals overlap.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/clusterize.py -i $formatType.inputFileName
-		#if $formatType.FormatInputFileName == 'bed':
-			-f bed
-		#elif $formatType.FormatInputFileName == 'gff':
-			-f gff
-		#elif $formatType.FormatInputFileName == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName == 'gtf':
-			-f gtf
-		#end if
-		-o $outputFileGff 
-		$colinear
-		$normalize
-		-d $distance
+		#set $inputFiles = ",".join(["%s" % (s) for s in $formatType.inputFileName])
+		../Java/Python/clusterize.py -i $inputFiles -f $formatType.FormatInputFileName -o $outputFileGff $colinear $normalize -d $distance
 	</command>
 
 	<inputs>
@@ -33,24 +17,28 @@
 				<option value="gff3">gff3</option>
 				<option value="sam">sam</option>
 				<option value="gtf">gtf</option>
+				<option value="bam">bam</option>
 			</param>
 			<when value="bed">
-				<param name="inputFileName" format="bed" type="data" label="Input File"/>
+				<param name="inputFileName" format="bed" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="gff">
-				<param name="inputFileName" format="gff" type="data" label="Input File"/>
+				<param name="inputFileName" format="gff" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="gff2">
-				<param name="inputFileName" format="gff2" type="data" label="Input File"/>
+				<param name="inputFileName" format="gff2" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="gff3">
-				<param name="inputFileName" format="gff3" type="data" label="Input File"/>
+				<param name="inputFileName" format="gff3" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="sam">
-				<param name="inputFileName" format="sam" type="data" label="Input File"/>
+				<param name="inputFileName" format="sam" type="data" multiple="true" label="Input File"/>
 			</when>
 			<when value="gtf">
-				<param name="inputFileName" format="gtf" type="data" label="Input File"/>
+				<param name="inputFileName" format="gtf" type="data" multiple="true" label="Input File"/>
+			</when>
+			<when value="bam">
+				<param name="inputFileName" format="bam" type="data" multiple="true" label="Input File"/>
 			</when>
 		</conditional>
 
@@ -63,6 +51,18 @@
 		<data name="outputFileGff" format="gff3"/>
 	</outputs> 
 
+    <tests>
+        <test>
+            <!-- basic test -->
+            <param name="FormatInputFileName" value="bed"/>
+            <param name="inputFileName" value="smart_1.bed" ftype="bed"/>
+            <param name="colinear" value="false"/>
+            <param name="normalize" value="false"/>
+            <param name="distance" value="0"/>
+            <output name="output" file="smart_clusterize_out_1.gff3" ftype="gff3"/>
+        </test>
+    </tests>
+
 	<help>
 The script clusterizes the input genomic data. Two features are clusterized when their genomic intervals overlap. The output is a GFF3 file, where each element is a cluster. The number of elements in the cluster is given by the tag **nbElements**. The name of a cluster is the concatation of the names of its reads (like **read1--read2--read3**). Note that if the size of the name of the cluster exceeds 100 characters, it is truncated to the first 100 characters.
 
@@ -70,4 +70,9 @@
 
 By default, the tool clusterizes all features which overlap (or nearly overlap), even if they are on different strands. If you want to clusterize the features which are on the same strand only, you can specify it.
 	</help>
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/CollapseReads.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/CollapseReads.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="collapseReads" name="collapse reads">
+<tool id="collapseReads" name="collapse reads" version="1.0.0">
 	<description>Merges two genomic features if they have exactly the same genomic coordinates.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -15,10 +15,12 @@
 			-f gff3
 		#elif $formatType.FormatInputFileName == 'sam':
 			-f sam
+		#elif $formatType.FormatInputFileName == 'bam':
+			-f bam
 		#elif $formatType.FormatInputFileName == 'gtf':
 			-f gtf
 		#end if
-		-$strand
+		$strand
 		-o $outputFileGff 
 	</command>
 
@@ -30,6 +32,7 @@
 				<option value="gff2">gff2</option>
 				<option value="gff3">gff3</option>
 				<option value="sam">sam</option>
+				<option value="bam">bam</option>
 				<option value="gtf">gtf</option>
 			</param>
 			<when value="bed">
@@ -47,6 +50,9 @@
 			<when value="sam">
 				<param name="inputFileName" format="sam" type="data" label="Input File"/>
 			</when>
+			<when value="bam">
+				<param name="inputFileName" format="bam" type="data" label="Input File"/>
+			</when>
 			<when value="gtf">
 				<param name="inputFileName" format="gtf" type="data" label="Input File"/>
 			</when>
@@ -64,4 +70,9 @@
 
 This is especially useful for short RNA sequencing (where you want to count the number of read per miRNA, siRNA, etc.) or 5' capped short reads.
 	</help>
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/CompareOverlappingAdapt.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/CompareOverlappingAdapt.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,48 +1,18 @@
-<tool id="CompareOverlappingAdapt" name="compare overlapping">
+<tool id="CompareOverlappingAdapt" name="compare overlapping" version="1.0.0">
 	<description>Provide the queries that overlap with a reference.</description>  
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/CompareOverlappingAdapt.py -i $formatType.inputFileName1 
-		#if $formatType.FormatInputFileName1 == 'bed':  
-		-f bed
-		#elif $formatType.FormatInputFileName1 == 'gff':
-			-f gff	
-		#elif $formatType.FormatInputFileName1 == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName1 == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName1 == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName1 == 'gtf':
-			-f gtf
-		#end if
-		-j $formatType2.inputFileName2
-		#if $formatType2.FormatInputFileName2 == 'bed':
-			-g bed
-		#elif $formatType2.FormatInputFileName2 == 'gff':
-			-g gff	
-		#elif $formatType2.FormatInputFileName2 == 'gff2':
-			-g gff2
-		#elif $formatType2.FormatInputFileName2 == 'gff3':
-			-g gff3
-		#elif $formatType2.FormatInputFileName2 == 'sam':
-			-g sam
-		#elif $formatType2.FormatInputFileName2 == 'gtf':
-		    -g gtf
-		#end if
-		-o $outputFileGff 
+		../Java/Python/CompareOverlapping.py -i $formatType.inputFileName1 -f $formatType.FormatInputFileName1 -j $formatType2.inputFileName2 -g $formatType2.FormatInputFileName2 -o $outputFileGff $InvertMatch $NotOverlapping -m $NbOverlap
 		#if $OptionDistance.Dist == 'Yes':
 			-d $OptionDistance.distance
 		#end if
-		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+		#if str($OptionCA) == 'Collinear':
 			-c 
-		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+		#elif str($OptionCA) == 'AntiSense':
 			-a
 		#end if	
-		$InvertMatch
-		$NotOverlapping
 	</command>
 
 	<inputs>
@@ -114,21 +84,14 @@
 			<when value="No">
 			</when>
 		</conditional>
-		<conditional name="OptionCollinearOrAntiSens">
-			<param name="OptionCA" type="select" label="Collinear or anti-sens">
-				<option value="Collinear">Collinear</option>
-				<option value="AntiSens">AntiSens</option>
-				<option value="NONE" selected="true">NONE</option>
-			</param>
-			<when value="Collinear">
-			</when>
-			<when value="AntiSens">
-			</when>
-			<when value="NONE">
-			</when>
-		</conditional>
+		<param name="OptionCA" type="select" label="Collinear or anti-sense features only">
+			<option value="Collinear">Collinear</option>
+			<option value="AntiSense">AntiSense</option>
+			<option value="All" selected="true">All</option>
+		</param>
 		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
 		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
+		<param name="NbOverlap" type="integer" value="1" label="Min. # of overlapping nt. to declare an overlap."/>
 	</inputs>
 
 	<outputs>
@@ -150,4 +113,9 @@
 
 Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
 	</help>
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/CompareOverlappingSmallQuery.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/CompareOverlappingSmallQuery.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,59 +1,24 @@
-<tool id="CompareOverlappingSmallQuery" name="compare overlapping small query">
+<tool id="CompareOverlappingSmallQuery" name="compare overlapping small query" version="1.0.0">
 	<description>Provide the queries that overlap with a reference, when the query data set is small.</description>  
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
-		#if $formatType.FormatInputFileName1 == 'bed':  
-		-f bed
-		#elif $formatType.FormatInputFileName1 == 'gff':
-			-f gff	
-		#elif $formatType.FormatInputFileName1 == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName1 == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName1 == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName1 == 'gtf':
-			-f gtf
-		#end if
-		-j $formatType2.inputFileName2
-		#if $formatType2.FormatInputFileName2 == 'bed':
-			-g bed
-		#elif $formatType2.FormatInputFileName2 == 'gff':
-			-g gff	
-		#elif $formatType2.FormatInputFileName2 == 'gff2':
-			-g gff2
-		#elif $formatType2.FormatInputFileName2 == 'gff3':
-			-g gff3
-		#elif $formatType2.FormatInputFileName2 == 'sam':
-			-g sam
-		#elif $formatType2.FormatInputFileName2 == 'gtf':
-		    -g gtf
-		#end if
-		-o $outputFileGff 
+		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 -f $formatType.FormatInputFileName1 -j $formatType2.inputFileName2 -g $formatType2.FormatInputFileName2 -o $outputFileGff $InvertMatch $NotOverlapping $OptionInclusionQuery $OptionInclusionRef -m $OptionMinOverlap
 		#if $OptionDistance.Dist == 'Yes':
 			-d $OptionDistance.distance
 		#end if
-		#if $OptionMinOverlap.present == 'Yes':
-			-m $OptionMinOverlap.minOverlap
-		#end if
 		#if $OptionPcOverlapQuery.present == 'Yes':
 			-p $OptionPcOverlapQuery.minOverlap
 		#end if
 		#if $OptionPcOverlapRef.present == 'Yes':
 			-P $OptionPcOverlapRef.minOverlap
 		#end if
-		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+		#if str($OptionCA) == 'Collinear':
 			-c 
-		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+		#elif str($OptionCA) == 'AntiSense':
 			-a
 		#end if	
-		$InvertMatch
-		$NotOverlapping
-		$OptionInclusionQuery
-		$OptionInclusionRef
 	</command>
 
 	<inputs>
@@ -64,6 +29,7 @@
 				<option value="gff2">gff2</option>
 				<option value="gff3">gff3</option>
 				<option value="sam">sam</option>
+				<option value="bam">bam</option>
 				<option value="gtf">gtf</option>
 			</param>
 			<when value="bed">
@@ -81,9 +47,12 @@
 			<when value="sam">
 				<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
 			</when>
+			<when value="bam">
+				<param name="inputFileName1" format="bam" type="data" label="Input File 1"/>
+			</when>
 			<when value="gtf">
 				<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
-								                        </when>
+			</when>
 		</conditional>
 
 		<conditional name="formatType2">
@@ -93,6 +62,7 @@
 				<option value="gff2">gff2</option>
 				<option value="gff3">gff3</option>
 				<option value="sam">sam</option>
+				<option value="bam">bam</option>
 				<option value="gtf">gtf</option>
 			</param>
 			<when value="bed">
@@ -110,6 +80,9 @@
 			<when value="sam">
 				<param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
 			</when>
+			<when value="bam">
+				<param name="inputFileName2" format="bam" type="data" label="Input File 2"/>
+			</when>
 			<when value="gtf">
 				<param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
 			</when>
@@ -125,24 +98,14 @@
 			<when value="No">
 			</when>
 		</conditional>
-		<conditional name="OptionMinOverlap">
-			<param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
-				<option value="Yes">Yes</option>
-				<option value="No" selected="true">No</option>
-			</param>
-			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
-			</when>
-			<when value="No">
-			</when>
-		</conditional>
+		<param name="OptionMinOverlap" type="integer" value="1" label="Min. # of overlapping nt. to declare an overlap."/>
 		<conditional name="OptionPcOverlapQuery">
 			<param name="present" type="select" label="N% of the query must overlap">
 				<option value="Yes">Yes</option>
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
@@ -153,26 +116,18 @@
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
 		</conditional>
-		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a reference"/>
 		<param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
-		<conditional name="OptionCollinearOrAntiSens">
-			<param name="OptionCA" type="select" label="Collinear or anti-sens">
-				<option value="Collinear">Collinear</option>
-				<option value="AntiSens">AntiSens</option>
-				<option value="NONE" selected="true">NONE</option>
-			</param>
-			<when value="Collinear">
-			</when>
-			<when value="AntiSens">
-			</when>
-			<when value="NONE">
-			</when>
-		</conditional>
+		<param name="OptionCA" type="select" label="Collinear or anti-sense only">
+			<option value="Collinear">Collinear</option>
+			<option value="AntiSense">AntiSense</option>
+			<option value="All" selected="true">All</option>
+		</param>
 		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
 		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
 	</inputs>
@@ -200,4 +155,10 @@
 
 Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
 	</help>
+
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/CompareOverlappingSmallRef.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/CompareOverlappingSmallRef.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,59 +1,24 @@
-<tool id="CompareOverlappingSmallRef" name="compare overlapping small reference">
-	<description>Provide the queries that overlap with a reference, when the reference dataset is small.</description>  
+<tool id="CompareOverlappingSmallRef" name="compare overlapping small reference" version="1.0.0">
+	<description>Provide the queries that overlap with a reference, when the query data set is small.</description>  
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
-		../Java/Python/CompareOverlappingSmallQuery.py -i $formatType.inputFileName1 
-		#if $formatType.FormatInputFileName1 == 'bed':  
-		-f bed
-		#elif $formatType.FormatInputFileName1 == 'gff':
-			-f gff	
-		#elif $formatType.FormatInputFileName1 == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName1 == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName1 == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName1 == 'gtf':
-			-f gtf
-		#end if
-		-j $formatType2.inputFileName2
-		#if $formatType2.FormatInputFileName2 == 'bed':
-			-g bed
-		#elif $formatType2.FormatInputFileName2 == 'gff':
-			-g gff	
-		#elif $formatType2.FormatInputFileName2 == 'gff2':
-			-g gff2
-		#elif $formatType2.FormatInputFileName2 == 'gff3':
-			-g gff3
-		#elif $formatType2.FormatInputFileName2 == 'sam':
-			-g sam
-		#elif $formatType2.FormatInputFileName2 == 'gtf':
-		    -g gtf
-		#end if
-		-o $outputFileGff 
+		../Java/Python/CompareOverlappingSmallRef.py -i $formatType.inputFileName1 -f $formatType.FormatInputFileName1 -j $formatType2.inputFileName2 -g $formatType2.FormatInputFileName2 -o $outputFileGff $InvertMatch $NotOverlapping $OptionInclusionQuery $OptionInclusionRef -m $OptionMinOverlap
 		#if $OptionDistance.Dist == 'Yes':
 			-d $OptionDistance.distance
 		#end if
-		#if $OptionMinOverlap.present == 'Yes':
-			-m $OptionMinOverlap.minOverlap
-		#end if
 		#if $OptionPcOverlapQuery.present == 'Yes':
 			-p $OptionPcOverlapQuery.minOverlap
 		#end if
 		#if $OptionPcOverlapRef.present == 'Yes':
 			-P $OptionPcOverlapRef.minOverlap
 		#end if
-		#if $OptionCollinearOrAntiSens.OptionCA == 'Collinear':
+		#if str($OptionCA) == 'Collinear':
 			-c 
-		#elif $OptionCollinearOrAntiSens.OptionCA == 'AntiSens':
+		#elif str($OptionCA) == 'AntiSense':
 			-a
 		#end if	
-		$InvertMatch
-		$NotOverlapping
-		$OptionInclusionQuery
-		$OptionInclusionRef
 	</command>
 
 	<inputs>
@@ -64,6 +29,7 @@
 				<option value="gff2">gff2</option>
 				<option value="gff3">gff3</option>
 				<option value="sam">sam</option>
+				<option value="bam">bam</option>
 				<option value="gtf">gtf</option>
 			</param>
 			<when value="bed">
@@ -81,9 +47,12 @@
 			<when value="sam">
 				<param name="inputFileName1" format="sam" type="data" label="Input File 1"/>
 			</when>
+			<when value="bam">
+				<param name="inputFileName1" format="bam" type="data" label="Input File 1"/>
+			</when>
 			<when value="gtf">
 				<param name="inputFileName1" format="gtf" type="data" label="Input File 1"/>
-								                        </when>
+			</when>
 		</conditional>
 
 		<conditional name="formatType2">
@@ -93,6 +62,7 @@
 				<option value="gff2">gff2</option>
 				<option value="gff3">gff3</option>
 				<option value="sam">sam</option>
+				<option value="bam">bam</option>
 				<option value="gtf">gtf</option>
 			</param>
 			<when value="bed">
@@ -110,6 +80,9 @@
 			<when value="sam">
 				<param name="inputFileName2" format="sam" type="data" label="Input File 2"/>
 			</when>
+			<when value="bam">
+				<param name="inputFileName2" format="bam" type="data" label="Input File 2"/>
+			</when>
 			<when value="gtf">
 				<param name="inputFileName2" format="gtf" type="data" label="Input File 2"/>
 			</when>
@@ -125,24 +98,14 @@
 			<when value="No">
 			</when>
 		</conditional>
-		<conditional name="OptionMinOverlap">
-			<param name="present" type="select" label="Minimum number of common nucleotides to declare an overlap">
-				<option value="Yes">Yes</option>
-				<option value="No" selected="true">No</option>
-			</param>
-			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
-			</when>
-			<when value="No">
-			</when>
-		</conditional>
+		<param name="OptionMinOverlap" type="integer" value="1" label="Min. # of overlapping nt. to declare an overlap."/>
 		<conditional name="OptionPcOverlapQuery">
 			<param name="present" type="select" label="N% of the query must overlap">
 				<option value="Yes">Yes</option>
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
@@ -153,26 +116,18 @@
 				<option value="No" selected="true">No</option>
 			</param>
 			<when value="Yes">
-				<param name="minOverlap" type="integer" value="0"/>
+				<param name="minOverlap" type="integer" value="100"/>
 			</when>
 			<when value="No">
 			</when>
 		</conditional>
-		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a query"/>
+		<param name="OptionInclusionQuery" type="boolean" truevalue="-k" falsevalue="" checked="false" label="The query must be nested in a reference"/>
 		<param name="OptionInclusionRef" type="boolean" truevalue="-K" falsevalue="" checked="false" label="The reference must be nested in a query"/>
-		<conditional name="OptionCollinearOrAntiSens">
-			<param name="OptionCA" type="select" label="Collinear or anti-sens">
-				<option value="Collinear">Collinear</option>
-				<option value="AntiSens">AntiSens</option>
-				<option value="NONE" selected="true">NONE</option>
-			</param>
-			<when value="Collinear">
-			</when>
-			<when value="AntiSens">
-			</when>
-			<when value="NONE">
-			</when>
-		</conditional>
+		<param name="OptionCA" type="select" label="Collinear or anti-sense only">
+			<option value="Collinear">Collinear</option>
+			<option value="AntiSense">AntiSense</option>
+			<option value="All" selected="true">All</option>
+		</param>
 		<param name="InvertMatch" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Invert match: the output file will contain all query elements which do NOT overlap"/>
 		<param name="NotOverlapping" type="boolean" truevalue="-O" falsevalue="" checked="false" label="Also report the query data which do not overlap, with the nbOverlaps tag set to 0."/>
 	</inputs>
@@ -200,4 +155,9 @@
 
 Some option reverses the selection. Put in other words, it performs the comparison as usual, and outputs all those query data which do not overlap.
 	</help>
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/ConvertTranscriptFile.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/ConvertTranscriptFile.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,35 +1,16 @@
-<tool id="ConvertTranscriptFile" name="convert transcript file">
+<tool id="ConvertTranscriptFile" name="convert transcript file" version="1.0.0">
   <description>Convert a file from a format to another.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
-  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFormatType.inputFileName 
-	  #if $inputFormatType.FormatInputFileName == 'gff3':
-	  	-f gff3
-	  #elif $inputFormatType.FormatInputFileName == 'bed': 
-	 	-f bed 
-	  #elif $inputFormatType.FormatInputFileName == 'bam': 
-	 	-f blast
-	  #elif $inputFormatType.FormatInputFileName == 'sam': 
-	 	-f sam
-	  #elif $inputFormatType.FormatInputFileName == 'gtf': 
-	 	-f gtf
-          #end if
-	  
-	 -g $outputFormatType.outFormat
-	  
-	  -n $name
-	  $strand
-	  -o $outputFile
-	 
-  </command>
+  <command interpreter="python"> ../Java/Python/convertTranscriptFile.py -i $inputFormatType.inputFileName -f $inputFormatType.FormatInputFileName -g $outputFormatType.outFormat -n $name $strand -o $outputFile </command>
   <inputs>
 	  <conditional name="inputFormatType">
 		  <param name="FormatInputFileName"  type="select" label="Input File Format">
 			  <option value="gff3">GFF3</option> 
 			  <option value="bed">BED</option> 
+			  <option value="sam">SAM</option> 
 			  <option value="bam">BAM</option> 
-			  <option value="sam">SAM</option> 
 			  <option value="gtf">GTF</option> 
 		  </param>
 		  <when value="gff3">  
@@ -38,12 +19,12 @@
 		  <when value="bed">  
 			  <param name="inputFileName" format="bed" type="data" label="Input File"/>
 		  </when>
+		  <when value="sam">  
+			  <param name="inputFileName" format="sam" type="data" label="Input File"/>
+		  </when>
 		  <when value="bam">  
 			  <param name="inputFileName" format="bam" type="data" label="Input File"/>
 		  </when>
-		  <when value="sam">  
-			  <param name="inputFileName" format="sam" type="data" label="Input File"/>
-		  </when>
 		  <when value="gtf">  
 			  <param name="inputFileName" format="gtf" type="data" label="Input File"/>
 		  </when>
@@ -95,4 +76,9 @@
   <help>
 Simple conversion tool.
   </help>
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/CountReadGCPercent.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/CountReadGCPercent.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="CountReadGCPercent" name="count read GC percent">
+<tool id="CountReadGCPercent" name="count read GC percent" version="1.0.0">
     <description>Count GC percent for each read against a genome.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -16,5 +16,9 @@
   <help>
 Count the GC% of a FASTA file.
   </help>
+
+  <citations>
+    <citation type="doi">10.1371/journal.pone.0025988</citation>
+  </citations>
+
 </tool>
-
--- a/SMART/galaxy/GetDifferentialExpression.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/GetDifferentialExpression.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="GetDifferentialExpression" name="get differential expression">
+<tool id="GetDifferentialExpression" name="get differential expression" version="1.0.0">
 	<description>Get the differential expression between 2 conditions using Fisher's exact test, on regions defined by a third file.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -211,4 +211,9 @@
 
 In most cases, the sizes of the two input samples will be different, so you should probably normalize the data, which is an available option. The ---rather crude--- normalization increases the number of data in the least populated sample and decreases the number of data in the most populated sample to the average number of data.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/GetFlanking.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/GetFlanking.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="GetFlanking" name="get flanking">
+<tool id="GetFlanking" name="get flanking" version="1.0.0">
 	<description>Get the flanking regions of a set of reference.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -190,5 +190,9 @@
 
 Notice that elements from the second sets may be printed at most once, whether they are the flanking element of several elements from the first or not.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
 	
 </tool>
--- a/SMART/galaxy/SelectByTag.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/SelectByTag.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="SelectByTag" name="select by tag">
+<tool id="SelectByTag" name="select by tag" version="1.0.0">
 	<description>Keep the genomic coordinates such that a value of a given tag.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -119,4 +119,9 @@
 
 This tool can be used to select the clusters with a minimum number of elements (the tag **nbElements** counts the number of elements per clusters) or to select the reads which have mapped less than *n* times (the tag **nbOccurrences** counts the number of mappings per read).
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/WrappGetLetterDistribution.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/WrappGetLetterDistribution.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getLetterDistribution1" name="get letter distribution">
+<tool id="getLetterDistribution1" name="get letter distribution" version="1.0.0">
     <description>Calculate distribution for each nucleotide per position for all short reads</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -32,17 +32,15 @@
                	<data name="ouputFileNamePNG1" format="png" label="[get letter distribution] PNG file 1"/>
                	<data name="ouputFileNamePNG2" format="png" label="[get letter distribution] PNG file 2"/>
     </outputs>
-    <tests>
-    	<test>
-            <param name="FormatInputFileName" value="fastq" />
-            <param name="inputFileName" value="short_fastq.fastq" />
-            <output name="outputFileNameCSV" file="exp_getletterdistribution_short_fastq.csv" />     
-        </test>
-    </tests>
 
 	<help>
 The script gets the nucleotide distribution of the input sequence list. It outputs two files. The first file shows the nucleotide distribution of the data. More precisely, a point (*x*, *y*) on the curve **A** shows that *y* sequences have *x* % of **A**.
   
 The second plot shows the average nucleotide distribution for each position of the read. You can use it to detect a bias in the first nucleotides, for instance. A point *x*, *y* on the curve **A** shows that at the position *x*, there are *y*% of **A**. A point (*x*, *y*) on the curve **#** tells you that *y* % of the sequences contain not less than *x* nucleotides. By definition, this latter line is a decreasing function. It usually explains why the tail of the other curves are sometimes erratic: there are few sequences.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/changeGffFeatures.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/changeGffFeatures.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="changeGffFeatures" name="change GFF features">
+<tool id="changeGffFeatures" name="change GFF features" version="1.0.0">
 	<description>Change a feature in a GFF file (the feature is the 3rd column).</description>
 		<requirements>
 			<requirement type="set_environment">PYTHONPATH</requirement>
@@ -19,5 +19,7 @@
 	  <help>
 		This script changes the third column of a GFF3 file (please refer to http://www.sequenceontology.org/gff3.shtml to know more about this format).
 	  </help>
+	  <citations>
+		  <citation type="doi">10.1371/journal.pone.0025988</citation>
+	  </citations>
 </tool>
-
--- a/SMART/galaxy/changeTagName.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/changeTagName.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="changeTagName" name="change tag name">
+<tool id="changeTagName" name="change tag name" version="1.0.0">
 	<description>Change the name of a tag in a GFF file.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -48,4 +48,8 @@
 	<help>
 		Change the name of a tag in the 9th field of a GFF3 file (please consult http://www.sequenceontology.org/gff3.shtml to know more about this format).
 	</help>
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/clusterizeBySlidingWindows.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/clusterizeBySlidingWindows.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,28 +1,11 @@
-<tool id="clusterizeBySlidingWindows" name="clusterize by sliding windows">
+<tool id="clusterizeBySlidingWindows" name="clusterize by sliding windows" version="1.0.0">
 	<description>Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region).</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
 	<command interpreter="python">
 		#set $inputFiles = ",".join(["%s" % (s) for s in $formatType.inputFileName])
-		../Java/Python/clusterizeBySlidingWindows.py -i $inputFiles
-		#if $formatType.FormatInputFileName == 'bed':
-			-f bed
-		#elif $formatType.FormatInputFileName == 'gff':
-			-f gff
-		#elif $formatType.FormatInputFileName == 'gff2':
-			-f gff2
-		#elif $formatType.FormatInputFileName == 'gff3':
-			-f gff3
-		#elif $formatType.FormatInputFileName == 'sam':
-			-f sam
-		#elif $formatType.FormatInputFileName == 'gtf':
-			-f gtf
-		#end if
-		-s $size
-		-e $overlap
-		-o $outputFileGff 
-		$strands
+		../Java/Python/clusterizeBySlidingWindows.py -i $inputFiles -f $formatType.FormatInputFileName -s $size -e $overlap -o $outputFileGff $strands
 		
 		#if $OptionTag.tag == "Yes":
 			-g $OptionTag.value
@@ -135,4 +118,9 @@
 
 The output file is a GFF3 file, where each element is a window. There is a special tag for each window, whose name is **nbElements** if you counted the number of transcripts per sliding window. However, if you performed a **min** (resp. **max**, **sum**, **median**, **average**) operation on the tags **value** of the transcripts, then the tag of the window will be **minValue** (resp. **maxValue**, **sumValue**, **medValue**, **avgValue**). You can also specify the name of your tag (which is actually advised: **nbReadsInSample1** will always be more informative than **nbElements**).
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/computeCoverage.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/computeCoverage.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="ComputeCoverage" name="compute coverage">
+<tool id="ComputeCoverage" name="compute coverage" version="1.0.0">
     <description>Compute the coverage of a set with respect to another set.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -109,5 +109,9 @@
 	<help>
 This tool considers a query and a reference files, and gives the coverage of the query file by the reference. The output file is similar to the query file, where a tag **coverage** has been added.
 	</help>
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
-
--- a/SMART/galaxy/coordinatesToSequence.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/coordinatesToSequence.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="coordinatesToSequence" name="coordinates to sequence">
+<tool id="coordinatesToSequence" name="coordinates to sequence" version="1.0.0">
 	<description>Coordinates to Sequences: Extract the sequences from a list of coordinates.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -65,4 +65,9 @@
 	<help>
 You can use this tool, if you just want to convert your mapping data to genomic coordinates, without any filtering. It requires a genomic coordinates file together with its format, an output format (GFF3, BED, etc...), the genome, and prints you the corresponding file.
 	</help>
+
+    <citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getDifference.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getDifference.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getDifference" name="get difference">
+<tool id="getDifference" name="get difference" version="1.0.0">
 	<description>Gets all the regions of the genome, except the one given in an annotation file. Alternatively, it may also give all the elements from the first set which does not ovelap with the second set (at the nucleotide level).</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -130,4 +130,9 @@
 
 When only one set of transcripts is given, together with a reference genome, it produces a list of transcripts which complements the first set.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getDistance.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getDistance.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="GetDistance" name="get distance">
+<tool id="GetDistance" name="get distance" version="1.0.0">
 	<description>Give the distances between every data from the first input set with respect to the data from the second input set.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -244,4 +244,8 @@
 You have several options for the output plot. You can first choose the region on the *x*-axis you want to plot. You can also display histograms instead of line plot. In this case, the data are summed into buckets, whose sizes are given as an option. For instance, a bucket of size *s* at the point (*x*, *y*) means that there are *y* pairs of elements which are separated by *x* to *x + s* nucleotides.
 	</help>
 
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getDistribution.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getDistribution.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getDistribution" name="get distribution">
+<tool id="getDistribution" name="get distribution" version="1.0.0">
 	<description>Get Distribution: Get the distribution of the genomic coordinates along a genome.</description>
     <requirements>
         <requirement type="set_environment">PYTHONPATH</requirement>
@@ -234,4 +234,9 @@
 
 If you want, you can also plot a specific region, by mentionning the chromosome, the start and the end positions of the region.
     </help>	
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getExons.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getExons.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getExons" name="get exons">
+<tool id="getExons" name="get exons" version="1.0.0">
     <description>Get the exons of a set of transcripts.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -72,16 +72,13 @@
     <outputs>
         <data format="gff3" name="outputFileGff" label="[get exons] output file"/>       
     </outputs> 
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
-      <param name="Value" value="No"/>
-      <output name="outputFileGff" file="exp_getExons.gff3" />
-    </test>
-</tests>
 
 	<help>
 Provide all the exons of an annotation file.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getIntrons.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getIntrons.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getIntrons" name="get introns">
+<tool id="getIntrons" name="get introns" version="1.0.0">
     <description>Get the introns of a set of transcripts.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -56,16 +56,13 @@
     <outputs>
         <data format="gff3" name="outputFileGff" label="[get introns] output file"/>       
     </outputs> 
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
-      <output name="outputFileGff" file="exp_getIntrons.gff3" />
-    </test>
-  </tests>
 
 	<help>
 Provide all the introns of an annotation file.
 	</help>
 
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+						 
 </tool>
--- a/SMART/galaxy/getReadDistribution.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getReadDistribution.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getReadDistribution" name="get read distribution">
+<tool id="getReadDistribution" name="get read distribution" version="1.0.0">
 	<description>Get Read Distribution v1.0.1: Plot the number of identical reads and give the most represented.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -67,4 +67,9 @@
     <help>
         This script gives a .tar out file, if you want to take look at the results, you have to download it.
     </help>	
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getSizes.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getSizes.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="GetSizes" name="get sizes">
+<tool id="GetSizes" name="get sizes" version="1.0.0">
 	<description>Get the sizes of a set of genomic coordinates.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -133,4 +133,9 @@
 
 When your mapping include exon/intron structures, you can decide to count the size of the introns, the sizes of the exons or the size of the first exons.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getWigData.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getWigData.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getWigData" name="get WIG data">
+<tool id="getWigData" name="get WIG data" version="1.0.0">
     <description>Compute the average data for some genomic coordinates using WIG files</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -25,4 +25,9 @@
 
 You can then plot your data using *plotTranscriptList.py*.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getWigDistance.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getWigDistance.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getWigDistance" name="get WIG distance">
+<tool id="getWigDistance" name="get WIG distance" version="1.0.0">
     <description>Compute the average data around some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -25,4 +25,9 @@
 
 You can possibly use a log scale for the *y*-axis.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/getWigProfile.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/getWigProfile.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="getWigProfile" name="get WIG profile">
+<tool id="getWigProfile" name="get WIG profile" version="1.0.0">
 	<description>Compute the average profile of some genomic coordinates using WIG files (thus covering a large proportion of the genome).</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -75,4 +75,9 @@
 
 The main inputs of the functions are a file containing a list of transcripts (or any sets of genomic interval) and a directory containing a set of WIG files (one file per chromosome, or one file per chromosome and per strand). The function then computes the WIG profile of each transcript. The user can also define a region around the transcripts that should also be plotted (in this case, the profile will include the WIG values which overlap with the transcript as well as the 5' and 3' regions). Since the transcript do not necessarily have the same sizes, all profiles will be extended or shrinked to fit in a size which is given by the user. If the resulting profile is a bit bumpy, the user can also smoothen the curve by using a linear smoothing function (the size of the smoothing window is given by the user). Finally, the user may want to plot the WIG data for the opposite strand too (if the strand specific WUG data are available).
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/mapperAnalyzer.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/mapperAnalyzer.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="mapperAnalyzer" name="mapper analyzer">
+<tool id="mapperAnalyzer" name="mapper analyzer" version="1.0.0">
 	<description>Read the output of an aligner, print statistics and possibly translate into GFF, BED or GBrowse formats. </description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -202,4 +202,9 @@
 
 By default, any gap in the alignment to the reference sequence is treated like an exon. You can decide to remove this feature by merging short introns (actually, gaps).
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/mergeSlidingWindowsClusters.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/mergeSlidingWindowsClusters.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="mergeSlidingWindowsClusters" name="merge sliding windows clusters">
+<tool id="mergeSlidingWindowsClusters" name="merge sliding windows clusters" version="1.0.0">
 	<description>Merges two files containing the results of a sliding windows clustering.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -107,4 +107,9 @@
 Sliding windows are also useful to compare two (or more!) sets of data. This can be very valuable when you want to compare differential expression in two different conditions. When you have two different sliding windows sets, this function merges them into one, where each window contains the two pieces of information. You may want to plot the data afterwards using the *plot transcript list* function.
 	</help>
 	
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/mergeTranscriptLists.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/mergeTranscriptLists.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="mergeTranscriptLists" name="merge transcript lists">
+<tool id="mergeTranscriptLists" name="merge transcript lists" version="1.0.0">
 	<description>Merge the elements of two lists of genomic coordinates.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -150,4 +150,9 @@
 
 Optionally, you can also add to the output all the elements from the first set which do not overlap with the second set.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/modifyGenomicCoordinates.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/modifyGenomicCoordinates.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="modifyGenomicCoordinates" name="modify genomic coordinates">
+<tool id="modifyGenomicCoordinates" name="modify genomic coordinates" version="1.0.0">
   <description>Extend or shrink a list of genomic coordinates.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -135,4 +135,9 @@
 
 The tool needs a transcript file, its format, and outputs a new transcript file.
   </help>
+
+  <citations>
+	  <citation type="doi">10.1371/journal.pone.0025988</citation>
+  </citations>
+
 </tool>
--- a/SMART/galaxy/modifySequenceList.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/modifySequenceList.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="modifySequenceList" name="modify sequence list">
+<tool id="modifySequenceList" name="modify sequence list" version="1.0.0">
   <description>Extend or shring a list of sequences. </description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -49,4 +49,9 @@
   <help>
   This tool reads a list of sequences (in multi-FASTA/Q format) that you provide and shrinks each sequence to the *n* first nucleotides or the *n* last nucleotides.
   </help>
+
+  <citations>
+	  <citation type="doi">10.1371/journal.pone.0025988</citation>
+  </citations>
+
 </tool>
--- a/SMART/galaxy/plotCoverage.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/plotCoverage.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="plotCoverage" name="plot coverage">
+<tool id="plotCoverage" name="plot coverage" version="1.0.0">
 	<description>Plot the coverage of the first data with respect to the second one.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -271,4 +271,9 @@
 
 This script gives a .tar out file, if you want to take look at the results, you have to download it.
     </help>		
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/plotTranscriptList.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/plotTranscriptList.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="plotTranscriptList" name="plot transcript list">
+<tool id="plotTranscriptList" name="plot transcript list" version="1.0.0">
 	<description>Plot some information from a list of transcripts. </description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -132,4 +132,9 @@
 
 If you use a cloud plot, you can compute the Spearman's rho to quantify a correlation between your two tag values.
 	</help>
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/removeExonLines.sh	Thu Jan 30 08:55:29 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-#!/bin/bash
-sed '/exon/d' $1
--- a/SMART/galaxy/removeExonLines.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/removeExonLines.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,18 +1,23 @@
-<tool id="removeExonLines" name="remove exon lines">
-  <description>Removes the lines containing Exon.</description>
+<tool id="removeExonLines" name="remove exon lines" version="1.0.0">
+  <description>Removes the introns of the transcript files.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
 	</requirements>
-  <command interpreter="sh"> ../Java/Python/removeExonLines.sh $inputFile > $outputFile  </command>
+  <command interpreter="python"> ../Java/Python/RemoveExons.py -i $inputFile -f gff3 -o $outputFile  </command>
   <inputs>
-    <param name="inputFile" type="data" label="Input File" format="gff3"/>
+		<param name="inputFile" format="gff3" type="data" label="Input File"/>
   </inputs>
 
   <outputs>
-    <data format="gff3" name="outputFile" label="[remove exon line] output file"/>
+    <data format="gff3" name="outputFile" label="[remove introns] output file"/>
   </outputs>
 
   <help>
-	command example: sh removeExonLines.sh input.gff3
+	Simply remove the introns of the input file, thus getting the unmatured transcript.
   </help>
+
+  <citations>
+	  <citation type="doi">10.1371/journal.pone.0025988</citation>
+  </citations>
+
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/removeIntrons.xml	Fri May 20 09:25:40 2016 -0400
@@ -0,0 +1,50 @@
+<tool id="removeIntrons" name="remove introns" version="1.0.0">
+  <description>Removes the introns of the transcript files.</description>
+	<requirements>
+		<requirement type="set_environment">PYTHONPATH</requirement>
+	</requirements>
+  <command interpreter="python"> ../Java/Python/RemoveExons.py -i $formatType.inputFileName -f $formatType.FormatInputFileName -o $outputFile  </command>
+  <inputs>
+		<conditional name="formatType">
+			<param name="FormatInputFileName" type="select" label="query File Format">
+				<option value="bed">bed</option>
+				<option value="gff">gff</option>
+				<option value="gff2">gff2</option>
+				<option value="gff3">gff3</option>
+				<option value="sam">sam</option>
+				<option value="gtf">gtf</option>
+			</param>
+			<when value="bed">
+				<param name="inputFileName" format="bed" type="data" label="Input File"/>
+			</when>
+			<when value="gff">
+				<param name="inputFileName" format="gff" type="data" label="Input File"/>
+			</when>
+			<when value="gff2">
+				<param name="inputFileName" format="gff2" type="data" label="Input File"/>
+			</when>
+			<when value="gff3">
+				<param name="inputFileName" format="gff3" type="data" label="Input File"/>
+			</when>
+			<when value="sam">
+				<param name="inputFileName" format="sam" type="data" label="Input File"/>
+			</when>
+			<when value="gtf">
+				<param name="inputFileName" format="gtf" type="data" label="Input File"/>
+			</when>
+		</conditional>
+  </inputs>
+
+  <outputs>
+    <data format="gff3" name="outputFile" label="[remove introns] output file"/>
+  </outputs>
+
+  <help>
+	Simply remove the introns of the input file, thus getting the unmatured transcript.
+  </help>
+
+  <citations>
+	  <citation type="doi">10.1371/journal.pone.0025988</citation>
+  </citations>
+
+</tool>
--- a/SMART/galaxy/restrictFromSize.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/restrictFromSize.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="restrictFromSize" name="restrict from size">
+<tool id="restrictFromSize" name="restrict from size" version="1.0.0">
 	<description>Select the elements of a list of sequences or transcripts with a given size.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -99,4 +99,9 @@
 Reads a list of sequences or genomic coordinates and outputs those which are longer and / or shorter than a given size ---which you provide.
 	</help>
 
+
+	<citations>
+		<citation type="doi">10.1371/journal.pone.0025988</citation>
+	</citations>
+
 </tool>
--- a/SMART/galaxy/restrictTranscriptList.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/restrictTranscriptList.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="restrictTranscriptList" name="restrict transcript list">
+<tool id="restrictTranscriptList" name="restrict transcript list" version="1.0.0">
   <description>Select the features which are located in a given locus.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -109,16 +109,9 @@
   <help>
 Reads a list of genomic coordinates and outputs those which on a given chromosome and / or between two given positions.
   </help>
-<tests>
-    <test>
-      <param name="FormatInputFileName" value="gtf" />
-      <param name="inputFileName" value="genes.gtf" />
-	<param name="Chrom" value="Yes"/>
-	<param name="ChromName" value="I"/>
-	<param name="start" value="No" />
-<param name="end" value="No" />
-      <output name="outputFile" file="exp_restrictTranscriptList.gff3" />
-    </test>
-  </tests>
+
+  <citations>
+	  <citation type="doi">10.1371/journal.pone.0025988</citation>
+  </citations>
 
 </tool>
--- a/SMART/galaxy/trimSequences.xml	Thu Jan 30 08:55:29 2014 -0500
+++ b/SMART/galaxy/trimSequences.xml	Fri May 20 09:25:40 2016 -0400
@@ -1,4 +1,4 @@
-<tool id="trimSequences" name="trim sequences">
+<tool id="trimSequences" name="trim sequences" version="1.0.0">
   <description>Remove the 5' and/or 3' adapters of a list of reads.</description>
 	<requirements>
 		<requirement type="set_environment">PYTHONPATH</requirement>
@@ -66,17 +66,8 @@
   <help>
 This function removes the adaptor from the 5' or 3' end of your reads. It can even recognize the adaptators which are partially present. You can specify whether you are ready to accept indels or not.
   </help>
-  <tests>
-	<test>
- 		<param name="inputFile" value="short_fastq.fastq" />
- 		<param name="FPADP" value="Yes"/>
-		<param name="fivePAdaptor" value="AAAA" />
-		<param name="TPADP" value="No"/>
-		<param name="errors" value="1"/>
-		<param name="indels" value="False"/>
-		<param name="noAdaptor5p" value="False"/>
-		<param name= "noAdaptor3p" value="False"/>
-		<output name="outputFile" file="exp_trimsequences_short_fastq.fastq" />
-	</test>
-  </tests>
+  <citations>
+    <citation type="doi">10.1371/journal.pone.0025988</citation>
+  </citations>
+
 </tool>
Binary file commons/__init__.pyc has changed
Binary file commons/core/__init__.pyc has changed
Binary file commons/core/checker/RepetException.pyc has changed
Binary file commons/core/checker/__init__.pyc has changed
Binary file commons/core/coord/Align.pyc has changed
Binary file commons/core/coord/Map.pyc has changed
Binary file commons/core/coord/Range.pyc has changed
Binary file commons/core/coord/__init__.pyc has changed
Binary file commons/core/parsing/.BamParser.py.swp has changed
Binary file commons/core/parsing/AxtParser.pyc has changed
Binary file commons/core/parsing/BamParser.pyc has changed
Binary file commons/core/parsing/BedParser.pyc has changed
Binary file commons/core/parsing/BlastParser.pyc has changed
Binary file commons/core/parsing/BowtieParser.pyc has changed
Binary file commons/core/parsing/CoordsParser.pyc has changed
Binary file commons/core/parsing/ElandParser.pyc has changed
Binary file commons/core/parsing/ExoParser.pyc has changed
Binary file commons/core/parsing/FastaParser.pyc has changed
Binary file commons/core/parsing/FastqParser.pyc has changed
Binary file commons/core/parsing/GffParser.pyc has changed
Binary file commons/core/parsing/GtfParser.pyc has changed
Binary file commons/core/parsing/MapParser.pyc has changed
Binary file commons/core/parsing/MapperParser.pyc has changed
Binary file commons/core/parsing/MaqParser.pyc has changed
Binary file commons/core/parsing/NCListParser.pyc has changed
Binary file commons/core/parsing/ParserChooser.pyc has changed
Binary file commons/core/parsing/PklParser.pyc has changed
Binary file commons/core/parsing/PslParser.pyc has changed
Binary file commons/core/parsing/RmapParser.pyc has changed
Binary file commons/core/parsing/SamParser.pyc has changed
Binary file commons/core/parsing/SeqmapParser.pyc has changed
Binary file commons/core/parsing/SequenceListParser.pyc has changed
Binary file commons/core/parsing/ShrimpParser.pyc has changed
Binary file commons/core/parsing/Soap2Parser.pyc has changed
Binary file commons/core/parsing/SoapParser.pyc has changed
Binary file commons/core/parsing/TranscriptListParser.pyc has changed
Binary file commons/core/parsing/WigParser.pyc has changed
Binary file commons/core/parsing/__init__.pyc has changed
Binary file commons/core/seq/Bioseq.pyc has changed
Binary file commons/core/seq/__init__.pyc has changed
Binary file commons/core/writer/BedWriter.pyc has changed
Binary file commons/core/writer/CsvWriter.pyc has changed
Binary file commons/core/writer/EmblWriter.pyc has changed
Binary file commons/core/writer/FastaWriter.pyc has changed
Binary file commons/core/writer/FastqWriter.pyc has changed
Binary file commons/core/writer/GbWriter.pyc has changed
Binary file commons/core/writer/Gff2Writer.pyc has changed
Binary file commons/core/writer/Gff3Writer.pyc has changed
Binary file commons/core/writer/GtfWriter.pyc has changed
Binary file commons/core/writer/MapWriter.pyc has changed
Binary file commons/core/writer/MySqlTranscriptWriter.pyc has changed
Binary file commons/core/writer/SamWriter.pyc has changed
Binary file commons/core/writer/SequenceListWriter.pyc has changed
Binary file commons/core/writer/TranscriptListWriter.pyc has changed
Binary file commons/core/writer/TranscriptWriter.pyc has changed
Binary file commons/core/writer/UcscWriter.pyc has changed
Binary file commons/core/writer/WigWriter.pyc has changed
Binary file commons/core/writer/WriterChooser.pyc has changed
Binary file commons/core/writer/__init__.pyc has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/smart_1.bed	Fri May 20 09:25:40 2016 -0400
@@ -0,0 +1,2 @@
+chr1	1000	5000	test1
+chr1	2000	6000	test2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/smart_clusterize_out_1.gff3	Fri May 20 09:25:40 2016 -0400
@@ -0,0 +1,1 @@
+chr1	S-MART	transcript	1000	5999	.	+	.	nbElements=2.000000;ID=test2--test1;Name=test2--test1