# HG changeset patch
# User mzytnicki
# Date 1391090080 18000
# Node ID 23ace8a3e22ccecd0dbda75e94441d402fa6f854
# Parent ff66073289423b4a31542f5dca213669fe4d028e
Uploaded
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/__init__.pyc
Binary file SMART/Java/Python/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/clusterizeBySlidingWindows.py
--- a/SMART/Java/Python/clusterizeBySlidingWindows.py Fri Jan 10 09:04:03 2014 -0500
+++ b/SMART/Java/Python/clusterizeBySlidingWindows.py Thu Jan 30 08:54:40 2014 -0500
@@ -28,317 +28,196 @@
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#
-import re
-from commons.core.writer.WriterChooser import WriterChooser
"""
Cluster the data into regions (defined by size and overlap with next region) and keep only highest peaks.
"""
-import os, os.path
+import os, os.path, re
from optparse import OptionParser
from SMART.Java.Python.structure.Transcript import Transcript
-from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
-from SMART.Java.Python.misc.RPlotter import RPlotter
from SMART.Java.Python.misc.Progress import Progress
+from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress
+from commons.core.parsing.ParserChooser import ParserChooser
+from commons.core.writer.WriterChooser import WriterChooser
from commons.core.writer.Gff3Writer import Gff3Writer
class ClusterizeBySlidingWindows(object):
- def __init__(self, verbosity = 0):
- self.verbosity = verbosity
- self.strands = (0, )
- self.normalize = False
- self.plot = None
- self.excel = None
- self.outputFileName = ''
- self.defaultValue = None
-
- def __del__(self):
- pass
-
- def setInputFile(self, fileName, format):
- self.parser = TranscriptContainer(fileName, format, self.verbosity)
-
- def setOutputFileName(self, fileName, format="gff", title="S-MART", feature="transcript", featurePart="exon"):
- writerChooser = WriterChooser(self.verbosity)
- writerChooser.findFormat(format)
- self.writer = writerChooser.getWriter(fileName)
- self.writer.setTitle(title)
- self.writer.setFeature(feature)
- self.writer.setFeaturePart(featurePart)
-# self.outputFileName = fileName
-# self.outputFormat = format
-
- def setWindowSize(self, size):
- self.size = size
+ def __init__(self, verbosity = 0):
+ self.verbosity = verbosity
+ self.strands = (0, )
+ self.outputFileName = ''
+ self.defaultValue = None
+ self.tag = None
+ self.valuesPerStrand = {}
- def setWindowOverlap(self, overlap):
- self.overlap = overlap
-
- def setTag(self, tag):
- self.tag = tag
-
- def setOperation(self, operation):
- self.operation = operation
-
- def setBothStrands(self, bothStrands):
- if bothStrands:
- self.strands = (-1, 1)
+ def setInputFile(self, fileNames, format):
+ parserChooser = ParserChooser(self.verbosity)
+ parserChooser.findFormat(format)
+ if self.outputTagNames is None:
+ if len(fileNames) == 1:
+ self.outputTagNames = ["nbElements"]
+ else:
+ operation = "nbElements" if self.operation is None else self.operation.lower()
+ self.outputTagNames = ["%s%s" % (operation, os.path.splitext(os.path.basename(fileName))[0].title()) for fileName in fileNames]
+ self.parsers = dict(zip(self.outputTagNames, [parserChooser.getParser(fileName) for fileName in fileNames]))
- def setNormalize(self, normalize):
- self.normalize = normalize
-
- def setPlot(self, plot):
- self.plot = plot
+ def setOutputFileName(self, fileName, format="gff", title="S-MART", feature="transcript", featurePart="exon"):
+ writerChooser = WriterChooser(self.verbosity)
+ writerChooser.findFormat(format)
+ self.writer = writerChooser.getWriter(fileName)
+ self.writer.setTitle(title)
+ self.writer.setFeature(feature)
+ self.writer.setFeaturePart(featurePart)
- def setExcel(self, excel):
- self.excel = excel
+ def setWindowSize(self, size):
+ self.size = size
- def setOutputTag(self, tag):
- self.outputTagName = tag
-
- def setDefaultValue(self, defaultValue):
- self.defaultValue = defaultValue
+ def setWindowOverlap(self, overlap):
+ self.overlap = overlap
- def checkOptions(self):
-# if self.operation != None:
-# raise Exception("Trying to combine the values without specifying tag! Aborting...")
- if self.operation != None and self.operation not in ("sum", "avg", "med", "min", "max"):
- raise Exception("Do not understand tag '%s'! Aborting..." % (self.operation))
+ def setTag(self, tag):
+ self.tag = tag
+
+ def setOperation(self, operation):
+ self.operation = operation
- def getChromosomeSizes(self):
- self.sizes = {}
- progress = Progress(self.parser.getNbTranscripts(), "Getting sizes in genome", self.verbosity)
- for transcript in self.parser.getIterator():
- self.sizes[transcript.getChromosome()] = max(transcript.getStart(), self.sizes.get(transcript.getChromosome(), 0))
- progress.inc()
- progress.done()
+ def setBothStrands(self, bothStrands):
+ if bothStrands:
+ self.strands = (-1, 1)
- def getBinsFromPos(self, pos):
- bin = (pos - 1) / (self.size - self.overlap)
- if bin >= 1 and pos <= bin * (self.size - self.overlap) + self.overlap:
- return (bin - 1, bin)
- return (bin, )
-
- def getPosFromBin(self, bin):
- return (bin * (self.size - self.overlap) + 1, bin * (self.size - self.overlap) + self.size)
+ def setOutputTag(self, tags):
+ self.outputTagNames = tags
+
+ def setDefaultValue(self, defaultValue):
+ self.defaultValue = defaultValue
- def initializeBins(self):
- self.binsPerStrand = {}
- self.sumsPerStrand = {}
- self.valuesPerStrand = {}
- self.toBePlottedPerStrand = {}
- for strand in self.strands:
- self.binsPerStrand[strand] = {}
- self.sumsPerStrand[strand] = {}
- self.valuesPerStrand[strand] = {}
- self.toBePlottedPerStrand[strand] = {}
- for chromosome in self.sizes:
- binRange = range(self.getBinsFromPos(self.sizes[chromosome])[-1] + 1)
- self.binsPerStrand[strand][chromosome] = dict([[i, 0] for i in binRange])
- self.sumsPerStrand[strand][chromosome] = dict([[i, 0.0] for i in binRange])
- self.valuesPerStrand[strand][chromosome] = dict([[i, []] for i in binRange])
- self.toBePlottedPerStrand[strand][chromosome] = dict([[i, 0] for i in binRange])
+ def checkOptions(self):
+ if self.operation != None and self.operation not in ("sum", "avg", "med", "min", "max"):
+ raise Exception("Do not understand tag '%s'! Aborting..." % (self.operation))
+ if self.size is None:
+ raise Exception("Please mention a windows size! Aborting...")
+ if self.overlap is None:
+ raise Exception("Please mention a windows overlap size! Aborting...")
- def getNbElements(self, transcript):
- nbOccurrences = 1 if "nbOccurrences" not in transcript.getTagNames() else transcript.getTagValue("nbOccurrences")
- nbElements = 1 if "nbElements" not in transcript.getTagNames() else transcript.getTagValue("nbElements")
- nbOccurrences = float(nbOccurrences)
- nbElements = float(nbElements)
- nbElements /= float(nbOccurrences)
- return nbElements
+ def getBinsFromPos(self, pos):
+ bin = (pos - 1) / (self.size - self.overlap)
+ if bin >= 1 and pos <= bin * (self.size - self.overlap) + self.overlap:
+ return (bin - 1, bin)
+ return (bin, )
- def setBins(self):
- progress = Progress(self.parser.getNbTranscripts(), "Setting bins", self.verbosity)
- for transcript in self.parser.getIterator():
- nbElements = self.getNbElements(transcript)
- strand = transcript.getDirection() if len(self.strands) == 2 else 0
- for bin in self.getBinsFromPos(transcript.getStart()):
- self.binsPerStrand[strand][transcript.getChromosome()][bin] += nbElements
- if self.tag != None:
- if self.tag not in transcript.getTagNames():
- if self.defaultValue is None:
- raise Exception("Tag %s undefined in transcript %s" % (self.tag, transcript))
- value = self.defaultValue
- else:
- value = float(transcript.getTagValue(self.tag))
- self.sumsPerStrand[strand][transcript.getChromosome()][bin] += value
- self.valuesPerStrand[strand][transcript.getChromosome()][bin].append(value)
- progress.inc()
- progress.done()
+ def getPosFromBin(self, bin):
+ return (bin * (self.size - self.overlap) + 1, bin * (self.size - self.overlap) + self.size)
- def aggregateData(self):
- if self.operation == "sum":
- self.computeSumData()
- elif self.operation == "avg":
- self.computeAvgData()
- elif self.operation == "med":
- self.computeMedData()
- elif self.operation == "min":
- self.computeMinData()
- elif self.operation == "max":
- self.computeMaxData()
- elif self.operation == "GCpercent":
- self.computeGCPercent()
- else:
- self.toBePlottedPerStrand = self.binsPerStrand
-
- def computeSumData(self):
- self.toBePlottedPerStrand = self.sumsPerStrand
+ def getNbElements(self, transcript):
+ nbOccurrences = 1 if "nbOccurrences" not in transcript.getTagNames() else transcript.getTagValue("nbOccurrences")
+ nbElements = 1 if "nbElements" not in transcript.getTagNames() else transcript.getTagValue("nbElements")
+ nbOccurrences = float(nbOccurrences)
+ nbElements = float(nbElements)
+ nbElements /= float(nbOccurrences)
+ return nbElements
- def computeAvgData(self):
- for strand in self.strands:
- for chromosome in self.binsPerStrand[strand]:
- for bin in self.binsPerStrand[strand][chromosome]:
- if self.binsPerStrand[strand][chromosome][bin] != 0:
- self.toBePlottedPerStrand[strand][chromosome][bin] = float(self.sumsPerStrand[strand][chromosome][bin]) / self.binsPerStrand[strand][chromosome][bin]
-
- def computeMedData(self):
- for strand in self.strands:
- for chromosome in self.binsPerStrand[strand]:
- for bin in self.binsPerStrand[strand][chromosome]:
- if self.valuesPerStrand[strand][chromosome][bin]:
- self.valuesPerStrand[strand][chromosome][bin].sort()
- size = len(self.valuesPerStrand[strand][chromosome][bin])
- if size % 2 == 1:
- self.toBePlottedPerStrand[strand][chromosome][bin] = self.valuesPerStrand[strand][chromosome][bin][(size - 1) / 2]
- else:
- self.toBePlottedPerStrand[strand][chromosome][bin] = (self.valuesPerStrand[strand][chromosome][bin][size / 2 - 1] + self.valuesPerStrand[strand][chromosome][bin][size / 2]) / 2.0
-
- def computeMinData(self):
- for strand in self.strands:
- for chromosome in self.binsPerStrand[strand]:
- for bin in self.binsPerStrand[strand][chromosome]:
- if self.valuesPerStrand[strand][chromosome][bin]:
- self.toBePlottedPerStrand[strand][chromosome][bin] = min(self.valuesPerStrand[strand][chromosome][bin])
+ def setBins(self, name, parser):
+ progress = UnlimitedProgress(10000, "Setting bins of file %s" % (parser.fileName), self.verbosity)
+ for transcript in parser.getIterator():
+ nbElements = self.getNbElements(transcript)
+ strand = transcript.getDirection() if len(self.strands) == 2 else 0
+ chromosome = transcript.getChromosome()
+ if strand not in self.valuesPerStrand:
+ self.valuesPerStrand[strand] = {}
+ if chromosome not in self.valuesPerStrand[strand]:
+ self.valuesPerStrand[strand][chromosome] = {}
+ for bin in self.getBinsFromPos(transcript.getStart()):
+ if self.tag is None:
+ value = nbElements
+ else:
+ if tag in transcript.getTagNames():
+ value = float(transcript.getTagValue(tag))
+ else:
+ if self.defaultValue is None:
+ raise Exception("Tag %s undefined in transcript %s" % (tag, transcript))
+ value = self.defaultValue
+ if bin not in self.valuesPerStrand[strand][chromosome]:
+ self.valuesPerStrand[strand][chromosome][bin] = {}
+ if name not in self.valuesPerStrand[strand][chromosome][bin]:
+ self.valuesPerStrand[strand][chromosome][bin][name] = []
+ self.valuesPerStrand[strand][transcript.getChromosome()][bin][name].append(value)
+ progress.inc()
+ progress.done()
- def computeMaxData(self):
- for strand in self.strands:
- for chromosome in self.binsPerStrand[strand]:
- for bin in self.binsPerStrand[strand][chromosome]:
- if self.valuesPerStrand[strand][chromosome][bin]:
- self.toBePlottedPerStrand[strand][chromosome][bin] = max(self.valuesPerStrand[strand][chromosome][bin])
-
- def computeGCPercent(self):
- for strand in self.strands:
- for chromosome in self.binsPerStrand[strand]:
- for bin in self.binsPerStrand[strand][chromosome]:
- if self.valuesPerStrand[strand][chromosome][bin]:
- subSequence = self.valuesPerStrand[strand][chromosome][bin]
- NPercent = 100 * (subSequence.countNt("N") / float(subSequence.getSize()))
- if NPercent >= 50:
- currentGCpercent = "NA"
- else:
- currentGCpercent = subSequence.getGCpercentageInSequenceWithoutCountNInLength()
-
- self.toBePlottedPerStrand[strand][chromosome][bin] = currentGCpercent
- #TODO: see if a map method could be used for the various "compute" methods
- #return currentGCpercent, NPercent
-
- def plotData(self):
- if self.plot != None:
- for strand in self.strands:
- adjunct = ""
- if strand != 0:
- adjunct = "Strand%d" % (strand)
- for chromosome in self.toBePlottedPerStrand[strand]:
- if len(self.toBePlottedPerStrand[strand][chromosome].keys()) > 0:
- plotter = RPlotter(self.plot, self.verbosity)
- plotter.setFill(0)
- plotter.addLine(self.toBePlottedPerStrand[strand][chromosome], chromosome)
- plotter.plot()
+ def aggregateData(self, values):
+ if self.operation is None or self.operation == "sum":
+ return sum(values)
+ if self.operation == "avg":
+ return sum(values) / float(len(values))
+ if self.operation == "med":
+ size = len(values)
+ sortedValues = sorted(values)
+ if size % 2 == 1:
+ return sorted[(size - 1) / 2]
+ return (sortedValues[size / 2 - 1] + sortedValues[size / 2]) / 2.0
+ if self.operation == "min":
+ return min(values)
+ if self.operation == "max":
+ return max(values)
+ if self.operation == "GCpercent":
+ subSequence = self.valuesPerStrand[strand][chromosome][bin]
+ NPercent = 100 * (values.countNt("N") / float(values.getSize()))
+ if NPercent >= 50:
+ return "NA"
+ return subSequence.getGCpercentageInSequenceWithoutCountNInLength()
- def writeExcel(self):
- if self.excel != None:
- excelFile = open(self.excel, "w")
- for strand in self.strands:
- maxBin = max([max(self.toBePlottedPerStrand[strand][chromosome].keys()) for chromosome in self.binsPerStrand[strand]])
- for bin in range(0, maxBin + 1):
- excelFile.write(",%d-%d" % self.getPosFromBin(bin))
- excelFile.write("\n")
- for chromosome in self.toBePlottedPerStrand[strand]:
- excelFile.write("%s" % (chromosome))
- for bin in self.toBePlottedPerStrand[strand][chromosome]:
- excelFile.write(",%f" % (self.toBePlottedPerStrand[strand][chromosome][bin]))
- excelFile.write("\n")
- excelFile.close()
+ def printRegions(self):
+ cpt = 1
+ for strand in self.strands:
+ for chromosome in sorted(self.valuesPerStrand[strand]):
+ for bin in sorted(self.valuesPerStrand[strand][chromosome]):
+ transcript = Transcript()
+ transcript.setName("region%d" % cpt)
+ transcript.setChromosome(chromosome)
+ transcript.setStart(self.getPosFromBin(bin)[0])
+ transcript.setEnd(self.getPosFromBin(bin)[1])
+ transcript.setDirection(1 if strand == 0 else strand)
+ for name in self.valuesPerStrand[strand][chromosome][bin]:
+ transcript.setTagValue(name, self.aggregateData(self.valuesPerStrand[strand][chromosome][bin][name]))
+ self.writer.addTranscript(transcript)
+ cpt += 1
+ self.writer.close()
- def printRegions(self):
- cpt = 1
- tagOp = "nb"
- tagName = "Elements"
- outputTagName = "nbElements"
- if self.operation != None:
- tagOp = self.operation.lower()
- if self.tag != None:
- tagName = self.tag.title()
- if self.outputTagName != None:
- outputTagName = self.outputTagName
-
-
- #writer = Gff3Writer(self.outputFileName, self.verbosity)
-
- for strand in self.strands:
- for chromosome in self.toBePlottedPerStrand[strand]:
- for bin in self.toBePlottedPerStrand[strand][chromosome]:
- transcript = Transcript()
- transcript.setName("region%d" % cpt)
- transcript.setChromosome(chromosome)
- transcript.setStart(self.getPosFromBin(bin)[0])
- transcript.setEnd(self.getPosFromBin(bin)[1])
- transcript.setDirection(1 if strand == 0 else strand)
- transcript.setTagValue(outputTagName, self.binsPerStrand[strand][chromosome][bin])
- transcript.setTagValue("%s%s" % (tagOp, tagName), str(self.toBePlottedPerStrand[strand][chromosome][bin]))
- self.writer.addTranscript(transcript)
- cpt += 1
- self.writer.close()
-
- def run(self):
- self.checkOptions()
- self.getChromosomeSizes()
- self.initializeBins()
- self.setBins()
- self.aggregateData()
- if self.excel:
- self.writeExcel()
- if self.plot:
- self.plotData()
- self.printRegions()
-
+ def run(self):
+ self.checkOptions()
+ for key, parser in self.parsers.iteritems():
+ self.setBins(key, parser)
+ self.printRegions()
if __name__ == "__main__":
-
- # parse command line
- description = "Clusterize by Sliding Windows v1.0.1: Produces a GFF3 file that clusters a list of transcripts using a sliding window. [Category: Sliding Windows]"
+
+ # parse command line
+ description = "Clusterize by Sliding Windows v1.0.2: Produces a GFF3 file that clusters a list of transcripts using a sliding window. [Category: Sliding Windows]"
- parser = OptionParser(description = description)
- parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
- parser.add_option("-f", "--inputFormat", dest="inputFormat", action="store", type="string", help="format of the input file [compulsory] [format: transcript file format]")
- parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
- parser.add_option("-u", "--outputFormat", dest="outputFormat", action="store", default="gff", type="string", help="format of the output file [format: transcript file format]")
- parser.add_option("-s", "--size", dest="size", action="store", type="int", help="size of the regions [compulsory] [format: int]")
- parser.add_option("-e", "--overlap", dest="overlap", action="store", type="int", help="overlap between two consecutive regions [compulsory] [format: int]")
- parser.add_option("-m", "--normalize", dest="normalize", action="store_true", default=False, help="normalize the number of reads per cluster by the number of mappings per read [format: bool] [default: false]")
- parser.add_option("-g", "--tag", dest="tag", action="store", default=None, type="string", help="use a given tag as input (instead of summing number of features) [format: string]")
- parser.add_option("-r", "--operation", dest="operation", action="store", default=None, type="string", help="combine tag value with given operation [format: choice (sum, avg, med, min, max)]")
- parser.add_option("-d", "--defaultValue",dest="defaultValue", action="store", type="float", help="default value for input tag [format: float]")
- parser.add_option("-w", "--write", dest="writeTag", action="store", default=None, type="string", help="print the result in the given tag (default usually is 'nbElements') [format: string]")
- parser.add_option("-2", "--strands", dest="strands", action="store_true", default=False, help="consider the two strands separately [format: bool] [default: false]")
- parser.add_option("-p", "--plot", dest="plot", action="store", default=None, type="string", help="plot regions to the given file [format: output file in PNG format]")
- parser.add_option("-x", "--excel", dest="excel", action="store", default=None, type="string", help="write an Excel file to the given file [format: output file in Excel format]")
- parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int] [default: 1]")
- (options, args) = parser.parse_args()
+ parser = OptionParser(description = description)
+ parser.add_option("-i", "--input", dest="inputFileName", action="store", type="string", help="input files, separated by commas [compulsory] [format: string]")
+ parser.add_option("-f", "--inputFormat", dest="inputFormat", action="store", type="string", help="format of the input file [compulsory] [format: transcript file format]")
+ parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in transcript format given by -u]")
+ parser.add_option("-u", "--outputFormat", dest="outputFormat", action="store", default="gff", type="string", help="format of the output file [format: transcript file format]")
+ parser.add_option("-s", "--size", dest="size", action="store", type="int", help="size of the regions [compulsory] [format: int]")
+ parser.add_option("-e", "--overlap", dest="overlap", action="store", type="int", help="overlap between two consecutive regions [compulsory] [format: int]")
+ parser.add_option("-g", "--tag", dest="tag", action="store", default=None, type="string", help="use a given tag as input (instead of summing number of features) [format: string]")
+ parser.add_option("-r", "--operation", dest="operation", action="store", default=None, type="string", help="combine tag value with given operation [format: choice (sum, avg, med, min, max)]")
+ parser.add_option("-d", "--defaultValue", dest="defaultValue", action="store", type="float", help="default value for input tag [format: float]")
+ parser.add_option("-w", "--write", dest="writeTag", action="store", default=None, type="string", help="print the result in the given tags, separated by commas (default usually is 'nbElements') [format: string]")
+ parser.add_option("-2", "--strands", dest="strands", action="store_true", default=False, help="consider the two strands separately [format: bool] [default: false]")
+ parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int] [default: 1]")
+ (options, args) = parser.parse_args()
- cbsw = ClusterizeBySlidingWindows(options.verbosity)
- cbsw.setInputFile(options.inputFileName, options.inputFormat)
- cbsw.setOutputFileName(options.outputFileName, options.outputFormat)
- cbsw.setWindowSize(options.size)
- cbsw.setWindowOverlap(options.overlap)
- cbsw.setTag(options.tag)
- cbsw.setDefaultValue(options.defaultValue)
- cbsw.setOperation(options.operation)
- cbsw.setOutputTag(options.writeTag)
- cbsw.setBothStrands(options.strands)
- cbsw.setPlot(options.plot)
- cbsw.setExcel(options.excel)
- cbsw.run()
+ cbsw = ClusterizeBySlidingWindows(options.verbosity)
+ cbsw.setOperation(options.operation)
+ cbsw.setTag(options.tag)
+ cbsw.setOutputTag(None if options.writeTag is None else options.writeTag.split(","))
+ cbsw.setInputFile(options.inputFileName.split(","), options.inputFormat)
+ cbsw.setOutputFileName(options.outputFileName, options.outputFormat)
+ cbsw.setWindowSize(options.size)
+ cbsw.setWindowOverlap(options.overlap)
+ cbsw.setDefaultValue(options.defaultValue)
+ cbsw.setBothStrands(options.strands)
+ cbsw.run()
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/misc/Progress.pyc
Binary file SMART/Java/Python/misc/Progress.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/misc/RPlotter.pyc
Binary file SMART/Java/Python/misc/RPlotter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/misc/UnlimitedProgress.pyc
Binary file SMART/Java/Python/misc/UnlimitedProgress.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/misc/Utils.pyc
Binary file SMART/Java/Python/misc/Utils.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/misc/__init__.pyc
Binary file SMART/Java/Python/misc/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/mySql/MySqlExonTable.pyc
Binary file SMART/Java/Python/mySql/MySqlExonTable.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/mySql/MySqlTable.pyc
Binary file SMART/Java/Python/mySql/MySqlTable.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/mySql/MySqlTranscriptTable.pyc
Binary file SMART/Java/Python/mySql/MySqlTranscriptTable.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/mySql/__init__.pyc
Binary file SMART/Java/Python/mySql/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/ncList/NCIndex.pyc
Binary file SMART/Java/Python/ncList/NCIndex.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/ncList/NCList.pyc
Binary file SMART/Java/Python/ncList/NCList.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/ncList/NCListCursor.pyc
Binary file SMART/Java/Python/ncList/NCListCursor.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/ncList/NCListFilePickle.pyc
Binary file SMART/Java/Python/ncList/NCListFilePickle.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/ncList/__init__.pyc
Binary file SMART/Java/Python/ncList/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/Bins.pyc
Binary file SMART/Java/Python/structure/Bins.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/Interval.pyc
Binary file SMART/Java/Python/structure/Interval.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/Mapping.pyc
Binary file SMART/Java/Python/structure/Mapping.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/Sequence.pyc
Binary file SMART/Java/Python/structure/Sequence.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/SequenceList.pyc
Binary file SMART/Java/Python/structure/SequenceList.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/SubMapping.pyc
Binary file SMART/Java/Python/structure/SubMapping.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/Transcript.pyc
Binary file SMART/Java/Python/structure/Transcript.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/TranscriptContainer.pyc
Binary file SMART/Java/Python/structure/TranscriptContainer.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/TranscriptList.pyc
Binary file SMART/Java/Python/structure/TranscriptList.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/Python/structure/__init__.pyc
Binary file SMART/Java/Python/structure/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/Java/__init__.pyc
Binary file SMART/Java/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/__init__.pyc
Binary file SMART/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/galaxy/.clusterizeBySlidingWindows.xml.swp
Binary file SMART/galaxy/.clusterizeBySlidingWindows.xml.swp has changed
diff -r ff6607328942 -r 23ace8a3e22c SMART/galaxy/.nfs0000000009a6075300000027
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/.nfs0000000009a6075300000027 Thu Jan 30 08:54:40 2014 -0500
@@ -0,0 +1,137 @@
+
+ Produces a GFF3 file that clusters a list of transcripts using a sliding window. Cluster the data into regions (defined by size and overlap with next region).
+
+ PYTHONPATH
+
+
+ ../Java/Python/clusterizeBySlidingWindows.py -i $formatType.inputFileName
+ #if $formatType.FormatInputFileName == 'bed':
+ -f bed
+ #elif $formatType.FormatInputFileName == 'gff':
+ -f gff
+ #elif $formatType.FormatInputFileName == 'gff2':
+ -f gff2
+ #elif $formatType.FormatInputFileName == 'gff3':
+ -f gff3
+ #elif $formatType.FormatInputFileName == 'sam':
+ -f sam
+ #elif $formatType.FormatInputFileName == 'gtf':
+ -f gtf
+ #end if
+ -s $size
+ -e $overlap
+ -o $outputFileGff
+ $strands
+
+ #if $OptionTag.tag == "Yes":
+ -g $OptionTag.value
+ #end if
+
+ #if $OptionsOperation.operation == "Yes":
+ -r $OptionsOperation.value
+ #end if
+
+ #if $OptionWriteTag.writeTag == "Yes":
+ -w $OptionWriteTag.value
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Sliding windows are a convenient ways to clusterize data mapped on the genome. There are two important parameters of a sliding window: the size of the window and the size of the overlap.
+
+By default, sliding windows count the number of reads in each window. However, you can basically merge any information which is contained in the tags. You can compute the average, sum, median, max or min of the tags for each window. For instance, every window can contain the average cluster size, if you merge clusters instead of reads.
+
+The output file is a GFF3 file, where each element is a window. There is a special tag for each window, whose name is **nbElements** if you counted the number of transcripts per sliding window. However, if you performed a **min** (resp. **max**, **sum**, **median**, **average**) operation on the tags **value** of the transcripts, then the tag of the window will be **minValue** (resp. **maxValue**, **sumValue**, **medValue**, **avgValue**). You can also specify the name of your tag (which is actually advised: **nbReadsInSample1** will always be more informative than **nbElements**).
+
+
diff -r ff6607328942 -r 23ace8a3e22c SMART/galaxy/clusterizeBySlidingWindows.xml
--- a/SMART/galaxy/clusterizeBySlidingWindows.xml Fri Jan 10 09:04:03 2014 -0500
+++ b/SMART/galaxy/clusterizeBySlidingWindows.xml Thu Jan 30 08:54:40 2014 -0500
@@ -4,7 +4,8 @@
PYTHONPATH
- ../Java/Python/clusterizeBySlidingWindows.py -i $formatType.inputFileName
+ #set $inputFiles = ",".join(["%s" % (s) for s in $formatType.inputFileName])
+ ../Java/Python/clusterizeBySlidingWindows.py -i $inputFiles
#if $formatType.FormatInputFileName == 'bed':
-f bed
#elif $formatType.FormatInputFileName == 'gff':
@@ -21,7 +22,6 @@
-s $size
-e $overlap
-o $outputFileGff
- $normalize
$strands
#if $OptionTag.tag == "Yes":
@@ -33,14 +33,10 @@
#end if
#if $OptionWriteTag.writeTag == "Yes":
- -w $OptionWriteTag.value
+ #set $outputTags = ",".join(["%s" % (t["value"]) for t in $OptionWriteTag.writeTags])
+ -w $outputTags
#end if
- $strand
- $plot $plotPng
- $excel $excelOutput
-
-
@@ -52,31 +48,34 @@
+
-
+
-
+
-
+
-
+
-
+
-
+
+
+
+
-
@@ -91,7 +90,6 @@
-
@@ -110,14 +108,15 @@
-
-
+
-
+
+
+
@@ -132,10 +131,8 @@
Sliding windows are a convenient ways to clusterize data mapped on the genome. There are two important parameters of a sliding window: the size of the window and the size of the overlap.
-By default, sliding windows count the number of reads in each window. However, you can basically merge any information which is contained in the tags. You can compute the average, sum, median, max or min of the tags for each window. For instance, every window can contain the average cluster size, if you merge clusters instead of reads.
+By default, sliding windows count the number of reads in each window of each input file. However, you can merge any information which is contained in the tags. You can compute the average, sum, median, max or min of the tags for each window. For instance, every window can contain the average cluster size, if you merge clusters instead of reads.
The output file is a GFF3 file, where each element is a window. There is a special tag for each window, whose name is **nbElements** if you counted the number of transcripts per sliding window. However, if you performed a **min** (resp. **max**, **sum**, **median**, **average**) operation on the tags **value** of the transcripts, then the tag of the window will be **minValue** (resp. **maxValue**, **sumValue**, **medValue**, **avgValue**). You can also specify the name of your tag (which is actually advised: **nbReadsInSample1** will always be more informative than **nbElements**).
-
-You also have different option, which can select the *n* % highest regions, or the regions with at least *n* features in it, or even the regions with at least *n* unique features. This last option is useful when you want to cluster the reads which have mapped only once, for instance.
diff -r ff6607328942 -r 23ace8a3e22c commons/__init__.pyc
Binary file commons/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/__init__.pyc
Binary file commons/core/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/checker/RepetException.pyc
Binary file commons/core/checker/RepetException.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/checker/__init__.pyc
Binary file commons/core/checker/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/coord/Align.pyc
Binary file commons/core/coord/Align.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/coord/Map.pyc
Binary file commons/core/coord/Map.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/coord/Range.pyc
Binary file commons/core/coord/Range.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/coord/__init__.pyc
Binary file commons/core/coord/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/AxtParser.pyc
Binary file commons/core/parsing/AxtParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/BamParser.pyc
Binary file commons/core/parsing/BamParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/BedParser.pyc
Binary file commons/core/parsing/BedParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/BlastParser.pyc
Binary file commons/core/parsing/BlastParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/BowtieParser.pyc
Binary file commons/core/parsing/BowtieParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/CoordsParser.pyc
Binary file commons/core/parsing/CoordsParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/ElandParser.pyc
Binary file commons/core/parsing/ElandParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/ExoParser.pyc
Binary file commons/core/parsing/ExoParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/FastaParser.pyc
Binary file commons/core/parsing/FastaParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/FastqParser.pyc
Binary file commons/core/parsing/FastqParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/GffParser.pyc
Binary file commons/core/parsing/GffParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/GtfParser.pyc
Binary file commons/core/parsing/GtfParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/MapParser.pyc
Binary file commons/core/parsing/MapParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/MapperParser.pyc
Binary file commons/core/parsing/MapperParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/MaqParser.pyc
Binary file commons/core/parsing/MaqParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/NCListParser.pyc
Binary file commons/core/parsing/NCListParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/ParserChooser.pyc
Binary file commons/core/parsing/ParserChooser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/PklParser.pyc
Binary file commons/core/parsing/PklParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/PslParser.pyc
Binary file commons/core/parsing/PslParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/RmapParser.pyc
Binary file commons/core/parsing/RmapParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/SamParser.pyc
Binary file commons/core/parsing/SamParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/SeqmapParser.pyc
Binary file commons/core/parsing/SeqmapParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/SequenceListParser.pyc
Binary file commons/core/parsing/SequenceListParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/ShrimpParser.pyc
Binary file commons/core/parsing/ShrimpParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/Soap2Parser.pyc
Binary file commons/core/parsing/Soap2Parser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/SoapParser.pyc
Binary file commons/core/parsing/SoapParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/TranscriptListParser.pyc
Binary file commons/core/parsing/TranscriptListParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/WigParser.pyc
Binary file commons/core/parsing/WigParser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/parsing/__init__.pyc
Binary file commons/core/parsing/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/seq/Bioseq.pyc
Binary file commons/core/seq/Bioseq.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/seq/__init__.pyc
Binary file commons/core/seq/__init__.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/BedWriter.pyc
Binary file commons/core/writer/BedWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/CsvWriter.pyc
Binary file commons/core/writer/CsvWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/EmblWriter.pyc
Binary file commons/core/writer/EmblWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/FastaWriter.pyc
Binary file commons/core/writer/FastaWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/FastqWriter.pyc
Binary file commons/core/writer/FastqWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/GbWriter.pyc
Binary file commons/core/writer/GbWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/Gff2Writer.pyc
Binary file commons/core/writer/Gff2Writer.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/Gff3Writer.pyc
Binary file commons/core/writer/Gff3Writer.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/GtfWriter.pyc
Binary file commons/core/writer/GtfWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/MapWriter.pyc
Binary file commons/core/writer/MapWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/MySqlTranscriptWriter.pyc
Binary file commons/core/writer/MySqlTranscriptWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/SamWriter.pyc
Binary file commons/core/writer/SamWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/SequenceListWriter.pyc
Binary file commons/core/writer/SequenceListWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/TranscriptListWriter.pyc
Binary file commons/core/writer/TranscriptListWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/UcscWriter.pyc
Binary file commons/core/writer/UcscWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/WigWriter.pyc
Binary file commons/core/writer/WigWriter.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/WriterChooser.pyc
Binary file commons/core/writer/WriterChooser.pyc has changed
diff -r ff6607328942 -r 23ace8a3e22c commons/core/writer/__init__.pyc
Binary file commons/core/writer/__init__.pyc has changed