changeset 8:f9032a866675 draft

Deleted selected files
author drosofff
date Tue, 24 Jun 2014 10:58:53 -0400
parents f777cbc82f98
children 256a0b1aa836
files mississippi_gcc/FeaturesParser.py mississippi_gcc/FeaturesParser.xml mississippi_gcc/MDS_wrapper.py mississippi_gcc/MirDeepStar.xml mississippi_gcc/MirParser.py mississippi_gcc/MirParser.xml mississippi_gcc/annotation_collector.py mississippi_gcc/bowtie_indices.loc.sample mississippi_gcc/piRNAsignature.py mississippi_gcc/piRNAsignature.xml mississippi_gcc/readmap.py mississippi_gcc/readmap.xml mississippi_gcc/sRbowtie.py mississippi_gcc/sRbowtie.xml mississippi_gcc/sRbowtieCascade.py mississippi_gcc/sRbowtieCascade.xml mississippi_gcc/sRbowtieParser.py mississippi_gcc/sRbowtieParser.xml mississippi_gcc/size_histogram.py mississippi_gcc/size_histogram.xml mississippi_gcc/smRtools.py mississippi_gcc/test-data/sRbowtie.fa mississippi_gcc/test-data/sRbowtie.out mississippi_gcc/test-data/yac.fastq mississippi_gcc/test-data/yac.out mississippi_gcc/tool_data_table_conf.xml.sample mississippi_gcc/yac.py mississippi_gcc/yac.xml
diffstat 28 files changed, 0 insertions(+), 3131 deletions(-) [+]
line wrap: on
line diff
--- a/mississippi_gcc/FeaturesParser.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-#!/usr/bin/python
-# python parser module to analyse Features in sRbowtie alignments (guided by a GFF3 file)
-# version 0.9
-# Usage FeaturesParser.py  <1:index source> <2:extraction directive> <3:output> <4:GFF3 guide file> <5:6:7 filePath:FileExt:FileLabel> <.. ad  lib>
-
-import sys
-from smRtools import *
-from collections import *
-
-IndexSource = sys.argv[1]
-ExtractionDirective = sys.argv[2]
-if ExtractionDirective == "--do_not_extract_index":
-  genomeRefFormat = "fastaSource"
-elif  ExtractionDirective == "--extract_index":
-  genomeRefFormat = "bowtieIndex"
-Output = sys.argv[3]
-GFF3_file = sys.argv[4]
-Triplets = [sys.argv[5:][i:i+3] for i in xrange(0, len(sys.argv[5:]), 3)]
-MasterListOfGenomes = {}
-FeatureDict = defaultdict(dict)
-
-for [filePath, FileExt, FileLabel] in Triplets:
-  MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (filePath, FileExt, IndexSource, genomeRefFormat) 
-  FeatureDict[FileLabel] = MasterListOfGenomes[FileLabel].CountFeatures(GFF3=GFF3_file)
-
-# add some code to pick up the GFF3 features in their order of appearence.
-F = open(GFF3_file, "r")
-featureList = []
-for line in F:
-  if line[0] == "#": continue
-  feature = line.split()[2]
-  if feature not in featureList:
-    featureList.append(feature)
-F.close()
-
-header = ["#Feature"]
-for [filePath, FileExt, FileLabel] in Triplets:
-  header.append(FileLabel)
-
-F = open (sys.argv[3], "w")
-print >> F, "\t".join(header)
-for feature in  featureList:
-  line=[feature]
-  for sample in header[1:]:
-    count = str (FeatureDict[sample][feature])
-# uncomment to get percentage in addition to counts
-#    percent = float(FeatureDict[sample][feature]) / MasterListOfGenomes[sample].alignedReads
-#    value = "%s | %0.2f" % (count, percent)
-#    line.append(value)
-    line.append(count)
-  print >> F,  "\t".join(line )
-line = ["Unfeatured"]
-for sample in header[1:]:
-  matched = 0
-  for feature in FeatureDict[sample]:
-    matched += FeatureDict[sample][feature]
-  unmatched = MasterListOfGenomes[sample].alignedReads - matched
-# uncomment to get percentage in addition to counts
-#  percent = float (unmatched) / (matched + unmatched)
-#  value = "%s | %0.2f" % (unmatched, percent)
-#  line.append(value)
-  line.append("%s" % unmatched)
-print >> F,  "\t".join(line)
-F.close()
--- a/mississippi_gcc/FeaturesParser.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-<tool id="FeaturesParser" name="Parse GFF3 features" version="0.9.1">
-  <description>in sRbowtie alignment</description>
-  <requirements><requirement type='package'>bowtie-inspect</requirement></requirements>
-  <parallelism method="basic"></parallelism>
-<command interpreter="python">
-        FeaturesParser.py 
-	          #if $refGenomeSource.genomeSource == "history":
-                    $refGenomeSource.ownFile ## index source  ## 1
-         	    --do_not_extract_index  ## 2
-          	  #else:
-		    #silent reference= filter( lambda x: str( x[0] ) == str( $input_list.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
-            	    $reference   ## index source ## 1
-            	    --extract_index  ## 2
-          	  #end if
-		  $output ## 3
-		  $gff3 ## 4
-		  #for $i in $refGenomeSource.input_list
-    		    $i $i.ext "$i.name"
-		  #end for
-
-</command>
-  <inputs>
-       <conditional name="refGenomeSource">
-           <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-               <option value="indexed">Use a built-in index</option>
-               <option value="history">Use one from the history</option>
-           </param>
-           <when value="indexed">
-               <param name="input_list" type="data" label="Select multiple alignments to parse" multiple="true">
-                  <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
-               </param>
-           </when>
-           <when value="history">
-                <param name="ownFile" type="data" format="fasta"  label="Select the fasta reference" />
-	        <param name="input_list" type="data" label="Select multiple alignments to parse" multiple="true"/>
-           </when>
-       </conditional>  <!-- refGenomeSource -->
-       <param name="gff3" type="data" format="gff3" label="A GFF3 with features' coordinates"/>
-   </inputs>
-   <outputs>
-   <data format="tabular" name="output" label="Feature Counts  Lists"/>
-   </outputs>
-  <help>
-
-**What it does**
-
-Parses Features Counts from one or several sRBowtie alignments (in tabular, Sam or Bam format).
-
-Both sense and antisense alignments are counted
-
-The library labels are infered from the input dataset names in the galaxy history.
-
-**It is thus essential that input datasets are appropriately renamed**
-
-**it is preferable that you do not put any space in this input dataset names. You may edit these names in the history**
-
-
-
-  </help>
-</tool>
-
--- a/mississippi_gcc/MDS_wrapper.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-#!/usr/bin/env python
-# mirdeep* python wrapper
-# refactoring a python version of the MDS_wrapper.pl
-# Usage MDS_wrapper.py <fasta input> <MDS genome> <gff3_output> <data.result> <data.cluster>
-
-import sys, re, os, subprocess, shlex, tempfile, shutil
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-MDS_path = "/home/galaxy/bin/MDS_command_line_v32/MDS_command_line"
-
-input_full_path = sys.argv[1]
-MDS_genome = sys.argv[2]
-gff3_output = sys.argv[3]
-dataresult = sys.argv[4]
-datacluster = sys.argv[5]
-
-tmp_MDS_work_dir = tempfile.mkdtemp(dir = MDS_path) # make temp directory for MDS analysis
-os.symlink(input_full_path, tmp_MDS_work_dir+"/data.fa" ) # symlink between the fasta source file and the required "data.fa" input
-os.symlink(MDS_path+"/MDS_command_line.jar", tmp_MDS_work_dir+"/MDS_command_line.jar" ) # symlink to jar source in working directory
-os.symlink(MDS_path+"/genome", tmp_MDS_work_dir+"/genome")
-os.symlink(MDS_path+"/targetScan", tmp_MDS_work_dir+"/targetScan")
-os.symlink(MDS_path+"/targetScan_files", tmp_MDS_work_dir+"/targetScan_files")
-
-# execute MirDeep*
-command_line = "java -Xmx4g -jar " + MDS_path + "/MDS_command_line.jar -r 5 -g " + MDS_genome + " data.fa" # -Xmx12g
-print command_line
-#tmp = tempfile.NamedTemporaryFile( dir=tmp_MDS_work_dir ).name
-#tmp_stderr = open( tmp, 'wb' )
-
-try:
-  os.chdir(tmp_MDS_work_dir)
-  p = subprocess.Popen(args=command_line, cwd=tmp_MDS_work_dir, shell=True, stderr=sys.stderr)
-  returncode = p.wait()
-  shutil.copy2 ("data.result", dataresult)
-  shutil.copy2 ("data.cluster", datacluster)
-  dataFILE = open("data.result", "r")
-  datafile = dataFILE.readlines()
-  dataFILE.close()
-  GFF3OUT = open(gff3_output, "w")
-  print >> GFF3OUT,"##gff-version 3"
-  print >> GFF3OUT, "##Seqid	Source	Type	Start	End	Score	Strand	Phase	Attributes"
-  print >> GFF3OUT, "##"
-  for line in datafile[1:]:
-    fields = line.split("\t")
-    Seqid, Source, Type, Start, End, Score, Strand, Phase = fields[2], "MirDeep*", "hairPin_loci", fields[4].split("-")[0], fields[4].split("-")[1], fields[1], fields[3], "."
-    ID = "ID=%s;%s_reads;%s;%s;mature_seq:%s" % (fields[0],fields[5],fields[7],fields[8],fields[9])
-    print >> GFF3OUT, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (Seqid, Source, Type, Start, End, Score, Strand, Phase, ID)
-  GFF3OUT.close()
-  if os.path.exists( tmp_MDS_work_dir ):
-    shutil.rmtree( tmp_MDS_work_dir )
-  else:
-    print "Error in cleaning tmp working directory"
-
-except Exception, e:
-  # clean up temp dir
-  if os.path.exists( tmp_MDS_work_dir ):
-    shutil.rmtree( tmp_MDS_work_dir )
-    stop_err( 'Error running MDS_command_line.jar\n' + str( e ) )
-
-
--- a/mississippi_gcc/MirDeepStar.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-<tool id="MirDeepStar" name="MirDeepStar">
-<description></description>
-<command interpreter="python">MDS_wrapper.py $input $genome $output $output2 $output3</command>
-
-<inputs>
-        <param name="input" type="data" format="fasta" label="input fasta file"/>
-        <param name="genome" type="select" label="MirDeep* reference genome">
-                <option value="AgamP4">SoftMasked Anopheles Gambiae P4</option>
-                <option value="AgamF">SoftMasked Anopheles Gambiae P3</option>
-                <option value="hg19">Human (hg19)</option>
-                <option value="mm9">Mouse (mm9)</option>
-                <option value="Dmel">Dmel (5.49)</option>
-        </param>
-</inputs>
-
-<outputs>
-        <data name="output3" format="tabular" label="mirdeep* clusters"/>
-        <data name="output2" format="tabular" label="mirdeep* raw output"/>
-	<data name="output" format="gff3" label="mirdeep* GFF output"/>
-</outputs>
-
-<help>
-
-**What it does**
-
-MirDeep* wrapper
-
-
-</help>
-
-</tool>
-
--- a/mississippi_gcc/MirParser.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-#!/usr/bin/python
-# python parser module for pre-mir and mature miRNAs, guided by mirbase.org GFF3
-# version 0.0.9 (1-6-2014)
-# Usage MirParser.py  <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3>
-#                     <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF">
-#                     <9:10:11 filePath:FileExt:FileLabel> <.. ad  lib>
-
-import sys, subprocess
-from smRtools import *
-
-IndexSource = sys.argv[1]
-ExtractionDirective = sys.argv[2]
-if ExtractionDirective == "--do_not_extract_index":
-  genomeRefFormat = "fastaSource"
-elif  ExtractionDirective == "--extract_index":
-  genomeRefFormat = "bowtieIndex"
-OutputPre_mirs = sys.argv[3]
-OutputMature_Mirs = sys.argv[4]
-GFF3_file = sys.argv[5]
-lattice = sys.argv[6]
-Rcode = sys.argv[7]
-latticePDF = sys.argv[8]
-Triplets = [sys.argv[9:][i:i+3] for i in xrange(0, len(sys.argv[9:]), 3)]
-MasterListOfGenomes = {}
-
-for [filePath, FileExt, FileLabel] in Triplets:
-  print FileLabel
-  MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=FileExt, genomeRefFile=IndexSource, genomeRefFormat=genomeRefFormat, biosample=FileLabel) 
-
-header = ["gene"]
-for [filePath, FileExt, FileLabel] in Triplets:
-  header.append(FileLabel)
-
-hit_table = ["\t".join(header)] # table header: gene, sample1, sample2, sample3, etc. separated by tabulation
-
-## read GFF3 to subinstantiate
-gff3 = open (GFF3_file, "r")
-lattice_dataframe = []
-for line in gff3:
-  if line[0] == "#": continue
-  gff_fields = line[:-1].split("\t")
-  chrom = gff_fields[0]
-  gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
-  item_upstream_coordinate = int(gff_fields[3])
-  item_downstream_coordinate = int(gff_fields[4])
-  if gff_fields[6] == "+":
-    item_polarity = "forward"
-  else:
-    item_polarity = "reverse"
-  item_line = [gff_name]
-  for sample in header[1:]:
-    count = MasterListOfGenomes[sample].instanceDict[chrom].readcount(upstream_coord=item_upstream_coordinate, downstream_coord=item_downstream_coordinate, polarity=item_polarity)
-    item_line.append(str(count))
-    ## subtreatement for lattice
-    if lattice != "dummy_dataframe_path":
-      if ("5p" not in gff_name) and  ("3p" not in gff_name):
-        lattice_dataframe.append(MasterListOfGenomes[sample].instanceDict[chrom].readcoverage(upstream_coord=item_upstream_coordinate, downstream_coord=item_downstream_coordinate, windowName=gff_name+"_"+sample) )
-    ## end of subtreatement for lattice
-  hit_table.append("\t".join(item_line) )
-gff3.close()
-
-Fpremirs = open (OutputPre_mirs, "w")
-print >> Fpremirs, hit_table[0]
-finalPreList = [ i for i in sorted(hit_table[1:]) if ("5p" not in i) and  ("3p" not in i)] 
-print >> Fpremirs, "\n".join(finalPreList )
-Fpremirs.close()
-
-Fmaturemires = open (OutputMature_Mirs, "w")
-print >> Fmaturemires, hit_table[0]
-finalMatureList = [ i for i in sorted(hit_table[1:]) if ("5p" in i) or ("3p" in i)]
-print >> Fmaturemires, "\n".join(finalMatureList )
-Fmaturemires.close()
-
-if lattice != "dummy_dataframe_path":
-  Flattice = open(lattice, "w")
-  print >> Flattice, "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("sample", "mir", "offset", "offsetNorm", "counts","countsNorm",  "polarity")
-  print >> Flattice, "\n".join(lattice_dataframe)
-  Flattice.close()
-  R_command="Rscript "+ Rcode
-  process = subprocess.Popen(R_command.split())
-  process.wait()
--- a/mississippi_gcc/MirParser.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,148 +0,0 @@
-<tool id="MirParser" name="Parse miRNAs" version="0.9.1">
-  <description>from sRbowtie aligment</description>
-  <requirements><requirement type='package'>bowtie-inspect</requirement></requirements>
-  <parallelism method="basic"></parallelism>
-<command interpreter="python">
-        MirParser.py 
-	          #if $refGenomeSource.genomeSource == "history":
-                    $refGenomeSource.ownFile ## index source sys.arg[1]
-         	    --do_not_extract_index  ## sys.argv[2]
-          	  #else:
-		    #silent reference= filter( lambda x: str( x[0] ) == str( $input_list.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
-		    $reference   ## sys.argv[1]
-		    --extract_index  ## sys.argv[2]
-          	  #end if
-		  $output1 ## for pre-mirs  ## sys.argv[3]
-		  $output2 ## for mature mirs  ## sys.argv[4]
-		  $GFF3    ## sys.argv[5]
-		  #if $plotting.plottingOption == "yes":
-		    $lattice_dataframe   ## sys.argv[6]
-		    $plotCode   ## sys.argv[7]
-		    $latticePDF ## sys.argv[8]
-		  #else:
-		    "dummy_dataframe_path"   ## sys.argv[6]
-                    "dummy_plotCode"   ## sys.argv[7]
-		    "dummy_latticePDF" ## sys.argv[8]
-		  #end if
-		  #for $i in $refGenomeSource.input_list
-    		    $i $i.ext "$i.name" ## sys.argv[9,10,11] modulo 3
-		  #end for
-                  #silent plottingoption = $plotting.plottingOption
-</command>
-  <inputs>
-       <conditional name="refGenomeSource">
-           <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-               <option value="indexed">Use a built-in index</option>
-               <option value="history">Use one from the history</option>
-           </param>
-           <when value="indexed">
-	       <param name="input_list" type="data" label="Select multiple alignments to parse" multiple="true">
-                  <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
-               </param>
-           </when>
-           <when value="history">
-	       <param name="input_list" type="data" label="Select multiple alignments to parse" multiple="true"/>
-               <param name="ownFile" type="data" format="fasta"  label="Select the fasta reference" />
-           </when>
-       </conditional>  <!-- refGenomeSource -->
-       <param name="GFF3" type="data" label="miRbase GFF3 guide" />
-       <conditional name="plotting">
-           <param name="plottingOption" type="select" label="Additional mir coverage graphs">
-               <option value="no" selected="True">No</option>
-               <option value="yes">YES</option>
-           </param>
-           <when value="yes">
-               <param name="display" type="select" label="Display Coverage with absolute number of reads or relatively to the total number of read matching the gene or mir">
-                 <option value="relative" selected="True">Relative Coverage</option>
-                 <option value="absolute">Absolute Coverage</option>
-               </param>
-           </when>
-       </conditional>
-   </inputs>
-   <configfiles>
-     <configfile name="plotCode">
-	#if  $plotting.plottingOption == "yes":
-          graph_type = "${plotting.display}" ## "relative" or "absolute"
-          ## Setup R error handling to go to stderr
-          options( show.error.messages=F,
-                 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-          library(lattice)
-          coverage = read.delim("${lattice_dataframe}", header=T)
-          Numb_of_biosamples = length(levels(coverage\$sample))
-          if (graph_type=="relative") {
-          graph = xyplot(countsNorm~offsetNorm | mir, data=coverage, groups=polarity, col=c("red", "blue"), type="l", lwd=1,
-                        scales=list(x=list(cex=.5), y=list(cex=.5)), par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), layout=c(Numb_of_biosamples,15), as.table=TRUE, main="miRNA coverage maps")
-          } else {
-          graph = xyplot(counts~offset | mir, data=coverage, groups=polarity, col=c("red", "blue"), type="l", lwd=1,
-                        scales=list(x=list(cex=.5), y=list(cex=.5)), par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), layout=c(Numb_of_biosamples,15), as.table=TRUE, main="miRNA coverage maps")
-          }
-          ## pdf output
-          pdf(file="${latticePDF}", paper="special", height=11.69, width=8.2677)
-          plot(graph, newpage = T)
-          dev.off()
-        #end if
-     </configfile>
-   </configfiles>
-
-   <outputs>
-   <data format="tabular" name="output1" label="Premirs Count  Lists"/>
-   <data format="tabular" name="output2" label="Mature Mirs Count  Lists"/>
-   <data format="tabular" name="lattice_dataframe" label="Lattice Dataframe">
-        <filter>plotting['plottingOption'] == "yes"</filter>
-   </data>
-   <data format="pdf" name="latticePDF" label="Mir coverage">
-        <filter>plotting['plottingOption'] == "yes"</filter>
-   </data>
-   </outputs>
-  <help>
-
-**What it does**
-
-This tool uses a specie-specific GFF3 file from mirBase_ to guide the parsing of an alignment file produced with the sRbowtie tool.
-
-.. _mirBase: ftp://mirbase.org/pub/mirbase/CURRENT/genomes/
-
-------
-
-.. class:: warningmark
-
-the Guide GFF3 file must be in the following format:
-
-2L	.	miRNA_primary_transcript	243035	243141	.	-	.	ID=MI0005821;Alias=MI0005821;Name=dme-mir-965
-
-2L	.	miRNA	243055	243076	.	-	.	ID=MIMAT0005480;Alias=MIMAT0005480;Name=dme-miR-965-3p;Derives_from=MI0005821
-
-2L	.	miRNA	243096	243118	.	-	.	ID=MIMAT0020861;Alias=MIMAT0020861;Name=dme-miR-965-5p;Derives_from=MI0005821
-
-2L	.	miRNA_primary_transcript	857542	857632	.	+	.	ID=MI0005813;Alias=MI0005813;Name=dme-mir-375
-
-2L	.	miRNA	857596	857617	.	+	.	ID=MIMAT0005472;Alias=MIMAT0005472;Name=dme-miR-375-3p;Derives_from=MI0005813
-
-2L	.	miRNA	857556	857579	.	+	.	ID=MIMAT0020853;Alias=MIMAT0020853;Name=dme-miR-375-5p;Derives_from=MI0005813
-
-2L	.	miRNA_primary_transcript	1831685	1831799	.	-	.	ID=MI0011290;Alias=MI0011290;Name=dme-mir-2280
-
-With name for mature miRNA (3rd column = miRNA) containing either the -3p or -5p string in the attribute Name (Name=dme-miR-965-3p, for instance)
-
-------
-
-**Input formats**
-
-1. One or sereral alignment files generated with sRbowtie tool and **renamed** according to the name of the biosample (avoid spaces in biosample labels)
-
-.. class:: warningmark
-
-Alignment datasets generated with sRbowtie must be renamed according to a biosample name
-
-2. A GFF3 file retrieved from mirBase_
-
-------
-
-**Outputs**
-
-Two count list files for counts of reads aligned to pre-mir or mature miRNA
-
-A pdf of pre-mir coverages. Red coverages indicate that the mir gene is in the genomic up strand, blue coverages indicate that the mir gene is in the genomic down strand.
-
-  </help>
-</tool>
--- a/mississippi_gcc/annotation_collector.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-#By, drosofff@gmail.com
-# command: annotation_collector.py $output input1, label1, input2, label2, etc...
-
-import sys, os
-
-def countlineinfile(file):
-  F = open (file, "r")
-  count = 0
-  for line in F:
-    count += 1
-  F.close()
-  return count/2
-results = []
-
-for file, label in zip (sys.argv[2:-1:2], sys.argv[3:-1:2]):
-  results.append ( (countlineinfile(file), label) )
-  
-Fout = open (sys.argv[1], "w")
-
-print >> Fout, "# %s" % (sys.argv[-1])
-for filecount, label in results:
-  print >> Fout, "%s\t%s\t%.2f %%" % (label, filecount, filecount/float(results[0][0])*100 )
-print >> Fout  
-Fout.close()
--- a/mississippi_gcc/bowtie_indices.loc.sample	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#This is a sample file distributed with Galaxy that enables tools
-#to use a directory of Bowtie indexed sequences data files. You will
-#need to create these data files and then create a bowtie_indices.loc
-#file similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The bowtie_indices.loc
-#file has this format (longer white space characters are TAB characters):
-#
-#<unique_build_id>   <dbkey>   <display_name>   <file_base_path>
-#
-#So, for example, if you had hg18 indexed stored in
-#/depot/data2/galaxy/bowtie/hg18/,
-#then the bowtie_indices.loc entry would look like this:
-#
-#hg18	hg18	hg18	/depot/data2/galaxy/bowtie/hg18/hg18
-#
-#and your /depot/data2/galaxy/bowtie/hg18/ directory
-#would contain hg18.*.ebwt files:
-#
-#-rw-r--r--  1 james    universe 830134 2005-09-13 10:12 hg18.1.ebwt
-#-rw-r--r--  1 james    universe 527388 2005-09-13 10:12 hg18.2.ebwt
-#-rw-r--r--  1 james    universe 269808 2005-09-13 10:12 hg18.3.ebwt
-#...etc...
-#
-#Your bowtie_indices.loc file should include an entry per line for each
-#index set you have stored. The "file" in the path does not actually
-#exist, but it is the prefix for the actual index files. For example:
-#
-#hg18canon			hg18	hg18 Canonical	/depot/data2/galaxy/bowtie/hg18/hg18canon
-#hg18full			hg18	hg18 Full		/depot/data2/galaxy/bowtie/hg18/hg18full
-#/orig/path/hg19	hg19	hg19			/depot/data2/galaxy/bowtie/hg19/hg19
-#...etc...
-#
-#Note that for backwards compatibility with workflows, the unique ID of
-#an entry must be the path that was in the original loc file, because that
-#is the value stored in the workflow for that parameter. That is why the
-#hg19 entry above looks odd. New genomes can be better-looking.
-#
--- a/mississippi_gcc/piRNAsignature.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#!/usr/bin/python
-# script for computing overlap signatures from a bowtie output
-# Christophe Antoniewski <drosofff@gmail.com>
-# Usage piRNAsignature.py <1:input> <2:format of input> <3:minsize query> <4:maxsize query> <5:minsize target> <6:maxsize target>
-#			  <7:minscope> <8:maxscope> <9:output> <10:bowtie index> <11:procedure option> <12: graph (global or lattice)>
-#			  <13: R code>
-
-import sys, subprocess
-from smRtools import *
-from collections import defaultdict # test whether it is required
-
-if sys.argv[11] == "--extract_index":
-  if sys.argv[2] == "tabular":
-    Genome = HandleSmRNAwindows (sys.argv[1],"tabular",sys.argv[10],"bowtieIndex")
-  elif sys.argv[2] == "sam":
-    Genome = HandleSmRNAwindows (sys.argv[1],"sam",sys.argv[10],"bowtieIndex")
-  else:
-    Genome = HandleSmRNAwindows (sys.argv[1],"bam",sys.argv[10],"bowtieIndex")
-else:
-  if sys.argv[2] == "tabular":
-    Genome = HandleSmRNAwindows (sys.argv[1],"tabular",sys.argv[10],"fastaSource") 
-  elif sys.argv[2] == "sam":
-    Genome = HandleSmRNAwindows (sys.argv[1],"sam",sys.argv[10],"fastaSource")
-  else:
-    Genome = HandleSmRNAwindows (sys.argv[1],"bam",sys.argv[10],"fastaSource")
-# this decisional tree may be simplified if sam and bam inputs are treated the same way by pysam
-
-# replace objDic by Genome.instanceDict or... objDic = Genome.instanceDict
-objDic = Genome.instanceDict
-
-minquery = int(sys.argv[3])
-maxquery = int(sys.argv[4])
-mintarget = int(sys.argv[5])
-maxtarget = int(sys.argv[6])
-minscope = int(sys.argv[7])
-maxscope = int(sys.argv[8]) + 1
-general_frequency_table = dict ([(i,0) for i in range(minscope,maxscope)])
-general_percent_table = dict ([(i,0) for i in range(minscope,maxscope)])
-OUT = open (sys.argv[9], "w")
-
-if sys.argv[12] == "global":
-  ###### for normalized summing of local_percent_table(s)
-  readcount_dic = {}
-  Total_read_in_objDic = 0
-  for item in objDic:
-    readcount_dic[item] = objDic[item].readcount(minquery, maxquery)
-    Total_read_in_objDic += readcount_dic[item]
-  ######
-  for x in (objDic):
-    local_frequency_table = objDic[x].signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) )
-    local_percent_table = objDic[x].hannon_signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) )
-    try:
-      for overlap in local_frequency_table.keys():
-        general_frequency_table[overlap] = general_frequency_table.get(overlap, 0) + local_frequency_table[overlap]
-    except:
-      pass
-    try:
-      for overlap in local_percent_table.keys():
-        general_percent_table[overlap] = general_percent_table.get(overlap, 0) + (1./Total_read_in_objDic*readcount_dic[x]*local_percent_table[overlap])
-    except:
-      pass
-  print >> OUT, "overlap\tnum of pairs\tprobability"
-  for classe in sorted(general_frequency_table):
-    print >> OUT, "%i\t%i\t%f" % (classe, general_frequency_table[classe], general_percent_table[classe])
-
-else:
-  print >> OUT, "overlap\tnum of pairs\tprobability\titem"
-  for x in (objDic):
-    local_frequency_table = objDic[x].signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) )
-    local_percent_table = objDic[x].hannon_signature( minquery, maxquery, mintarget, maxtarget, range(minscope,maxscope) )
-    for classe in range(minscope,maxscope):
-      print >> OUT, "%i\t%i\t%f\t%s" % (classe, local_frequency_table[classe], local_percent_table[classe], x)
-
-OUT.close()
-
-## Run the R script that is defined in the xml using the Rscript binary provided with R.
-R_command="Rscript "+ sys.argv[13]
-process = subprocess.Popen(R_command.split())
--- a/mississippi_gcc/piRNAsignature.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,122 +0,0 @@
-<tool id="piRNAsignature" name="piRNA Signatures" version="1.0.1">
-	<description></description>
-	<command interpreter="python">
-          piRNAsignature.py $refGenomeSource.input $refGenomeSource.input.ext $minquery $maxquery $mintarget $maxtarget $minscope $maxscope $output
-          #if $refGenomeSource.genomeSource == "history":
-            $refGenomeSource.ownFile
-            --do_not_extract_index
-          #else:
-            #silent reference= filter( lambda x: str( x[0] ) == str( $input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
-            $reference
-            --extract_index
-          #end if
-	  $graph_type 
-          $sigplotter
-       </command>
-
-	<inputs>
-          <conditional name="refGenomeSource">
-             <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-               <option value="indexed">Use a built-in index</option>
-               <option value="history">Use one from the history</option>
-             </param>
-	     <when value="indexed">
-  	        <param name="input" type="data" format="tabular,sam,bam" label="Compute signature from this bowtie standard output">
-		  <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
-	        </param>
-	     </when>
-             <when value="history">
-                <param name="ownFile" type="data" format="fasta"  label="Select the fasta reference" />
-  	        <param name="input" type="data" format="tabular,sam,bam" label="Compute signature from this bowtie standard output"/>
-             </when>
-          </conditional>  <!-- refGenomeSource -->
-		<param name="minquery" type="integer" size="3" value="23" label="Min size of query small RNAs" help="'23' = 23 nucleotides"/>
-		<param name="maxquery" type="integer" size="3" value="29" label="Max size of query small RNAs" help="'29' = 29 nucleotides"/>
-                <param name="mintarget" type="integer" size="3" value="23" label="Min size of target small RNAs" help="'23' = 23 nucleotides"/>
-                <param name="maxtarget" type="integer" size="3" value="29" label="Max size of target small RNAs" help="'29' = 29 nucleotides"/>
-                <param name="minscope" type="integer" size="3" value="1" label="Minimal relative overlap analyzed" help="'1' = 1 nucleotide overlap"/>
-                <param name="maxscope" type="integer" size="3" value="26" label="Maximal relative overlap analyzed" help="'1' = 1 nucleotide overlap"/>
-		<param name="graph_type" type="select" label="Graph type" help="Signature can be computed globally or by item present in the alignment file">
-		  <option value="global" selected="True">Global</option>
-                  <option value="lattice">Lattice</option>
-		</param>
-	</inputs>
-
-  <configfiles>
-    <configfile name="sigplotter">
-      graph_type = "${graph_type}"
-
-      globalgraph = function () {
-        ## Setup R error handling to go to stderr
-        options( show.error.messages=F,
-                 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-        signature = read.delim("${output}", header=TRUE)
-        ## Open output1 PDF file
-        pdf( "${output1}" )
-        signaturez=(signature[,2] -mean(signature[,2]))/sd(signature[,2])
-        plot(signaturez, type = "b", main="signature", cex.main=2, xlab="overlap (nt)", ylab="z-score", pch=19, cex=0.8, col="darkslateblue", lwd=3, cex.lab=1.5, cex.axis=1.4, xaxt="n")
-        axis(1, at=seq(from=1, to=length(signature[,1]), by=3) )
-        devname = dev.off()
-        ## Close the PDF file
-        ## Open output2 PDF file
-        pdf( "${output2}" )
-        YLIM=max(signature[,2])
-        plot(signature[,1:2], type = "h", xlab="overlap (nt)", ylim=c(0,YLIM), ylab="number of pairs", col="darkslateblue", lwd=7)
-        devname = dev.off()
-        ## Close the PDF file
-        ## Open output3 PDF file
-        pdf( "${output3}" )
-        plot(signature[,1], signature[,3]*100, type = "l", main="ping-pong Signature of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs",
-             cex.main=1, xlab="overlap (nt)", ylab="ping-pong signal [%]", ylim=c(0,50),
-             pch=19, col="darkslateblue", lwd =4, cex.lab=1.2, cex.axis=1, xaxt="n")
-        axis(1, at=seq(from=1, to=length(signature[,1]), by=3) )
-        devname = dev.off()
-        ## Close the PDF file
-      }
-
-      treillisgraph = function () {
-        ## Open output3 PDF file
-        pdf( "${output3}", paper="special", height=11.69, width=8.2677 )
-        signature = read.delim("${output}", header=TRUE)
-        options( show.error.messages=F,
-               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-        library(lattice)
-        print (xyplot(signature[,3]*100~signature[,1]|signature[,4], type = "l", xlim=c(1,26), main="ping-pong Signature of ${minquery}-${maxquery} against ${mintarget}-${maxtarget}nt small RNAs",
-             par.strip.text=list(cex=.5), strip=strip.custom(which.given=1, bg="lightblue"), scales=list(cex=0.5),
-             cex.main=1, cex=.5, xlab="overlap (nt)", ylab="ping-pong signal [%]",
-             pch=19, col="darkslateblue", lwd =1.5, cex.lab=1.2, cex.axis=1.2,
-             layout=c(4,12), as.table=TRUE, newpage = T) )
-        devnname = dev.off()
-      }
-
-      if (graph_type=="global") {
-	globalgraph()
-
-      }
-      if(graph_type=="lattice") {
-        treillisgraph()
-      }
-    </configfile>
-  </configfiles>
-
-        <outputs>
-                <data name="output" format="tabular" label = "signature data frame"/>
-                <data name="output2" format="pdf" label="number of pairs signature">
-                <filter>(graph_type == "global")</filter>
-                </data>
-                <data name="output1" format="pdf" label="z-score signature">
-                <filter>(graph_type == "global")</filter>
-                </data>
-                <data name="output3" format="pdf" label="ping-pong signal (hannon algo.)"/>
-        </outputs>
-
-        <help>
-
-**What it does**
-
-This tool computes the number of pairs by overlap classes (in nt) from a bowtie output file, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.
-The numerical options set the min and max size of both the query small rna class and the target small rna class
-Three type of signals are plotted in separate pdf files, the number of pairs founds, the z-score calculated from these numbers of pairs, and the ping-pong signal as described in Brennecke et al (2009) Science.
-
-        </help>
-</tool>
--- a/mississippi_gcc/readmap.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,116 +0,0 @@
-#!/usr/bin/python
-# python parser module for for readmaps and size distributions, guided by GFF3
-# version 0.9.1 (1-6-2014)
-# Usage readmap.py  <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3>
-#                     <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF">
-#                     <9:10:11 filePath:FileExt:FileLabel> <.. ad  lib>
-
-import sys, subprocess, argparse
-from smRtools import *
-from collections import OrderedDict, defaultdict
-import os
-
-def Parser():
-  the_parser = argparse.ArgumentParser()
-  the_parser.add_argument('--output_readmap', action="store", type=str, help="readmap dataframe")
-  the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe")
-  the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file")
-  the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references")
-  the_parser.add_argument('--input',nargs='+', help="paths to multiple input files")
-  the_parser.add_argument('--ext',nargs='+', help="input file type")
-  the_parser.add_argument('--label',nargs='+', help="labels of multiple input files")
-  the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file")
-  the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest")
-  the_parser.add_argument('--minquery', type=int, help="Minimum readsize")
-  the_parser.add_argument('--maxquery', type=int, help="Maximum readsize")
-  the_parser.add_argument('--rcode', type=str, help="R script")
-  args = the_parser.parse_args()
-  return args
-
-args=Parser()
-if args.reference_fasta:
-  genomeRefFormat = "fastaSource"
-  genomeRefFile = args.reference_fasta  
-if args.reference_bowtie_index:
-  genomeRefFormat = "bowtieIndex"
-  genomeRefFile = args.reference_bowtie_index  
-readmap_file=args.output_readmap
-size_distribution_file=args.output_size_distribution
-minquery=args.minquery
-maxquery=args.maxquery
-Rcode = args.rcode
-filePath=args.input
-fileExt=args.ext
-fileLabel=args.label
-normalization_factor=args.normalization_factor
-
-MasterListOfGenomes = OrderedDict()
-
-def process_samples(filePath):
-  for i, filePath in enumerate(filePath):
-    norm=normalization_factor[i]
-    print fileLabel[i]
-    MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\
-                        biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm)
-  return MasterListOfGenomes
-
-def write_readplot_dataframe(readDict, readmap_file):
-  with open(readmap_file, 'w') as readmap:
-    print >>readmap, "gene\tcoord\tcount\tpolarity\tsample"
-    for sample in readDict.keys():
-      if args.gff:
-        dict=readDict[sample]
-      else:
-        dict=readDict[sample].instanceDict
-      for gene in dict.keys():
-        plottable = dict[gene].readplot()
-        for line in plottable:
-          print >>readmap, "%s\t%s" % (line, sample)
-
-def write_size_distribution_dataframe(readDict, size_distribution_file):
-  with open(size_distribution_file, 'w') as size_distrib:
-    print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample"
-    for sample in readDict.keys():
-      if args.gff:
-        dict=readDict[sample]
-      else:
-        dict=readDict[sample].instanceDict
-      for gene in dict.keys():
-        histogram = dict[gene].size_histogram()
-        for polarity in histogram.keys():
-          for item in histogram[polarity].iteritems():
-            print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, item[0], item[1], sample)
-
-def gff_item_subinstances(readDict, gff3):
-  GFFinstanceDict=OrderedDict()
-  with open(gff3) as gff:
-    for line in gff:
-      if line[0] == "#": continue
-      gff_fields = line[:-1].split("\t")
-      chrom = gff_fields[0]
-      gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
-      item_upstream_coordinate = int(gff_fields[3])
-      item_downstream_coordinate = int(gff_fields[4])
-      item_polarity = gff_fields[6]
-      for sample in readDict.keys():
-	if not GFFinstanceDict.has_key(sample):
-          GFFinstanceDict[sample]={}
-        subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom])
-        if item_polarity == '-':
-          subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()}
-        subinstance.gene=gff_name
-        GFFinstanceDict[sample][gff_name]=subinstance
-  return GFFinstanceDict
-
-MasterListOfGenomes=process_samples(filePath)
-
-if args.gff:
-  MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff)
-
-write_readplot_dataframe(MasterListOfGenomes, readmap_file)
-write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file)
-
-R_command="Rscript "+ Rcode
-process = subprocess.Popen(R_command.split())
-process.wait()
-	
--- a/mississippi_gcc/readmap.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,225 +0,0 @@
-<tool id="Readmap" name="Generate readmap and histograms from alignment files" version="0.9.2">
-  <description>from sRbowtie aligment</description>
-  <requirements><requirement type='package'>bowtie-inspect</requirement></requirements>
-  <parallelism method="basic"></parallelism>
-<command interpreter="python">
-        readmap.py 
-	          #if $refGenomeSource.genomeSource == "history":
-         	    --reference_fasta  ## sys.argv[2]
-                    $refGenomeSource.ownFile ## index source
-          	  #else:
-                    #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
-		    --reference_bowtie_index
-                    $reference
-          	  #end if
-		  --rcode
-		  $plotCode
-		  --output_readmap
-		  $readmap_dataframe
-		  --output_size_distribution
-		  $size_distribution_dataframe
-		  --minquery
-		  $minquery
-		  --maxquery
-		  $maxquery
-		  --input
-		  #for $i in $refGenomeSource.series
-    		    $i.input 
-		  #end for
-		  --ext
-		  #for $i in $refGenomeSource.series
-    		    $i.input.ext 
-		  #end for
-		  --label
-		  #for $i in $refGenomeSource.series
-    		    "$i.input.name" 
-		  #end for
-		  --normalization_factor
-		  #for $i in $refGenomeSource.series
-    		    $i.norm
-		  #end for
-		  #if $gff:
-		    --gff
-                    $gff
-                  #end if
-
-</command>
-  <inputs>
-       <conditional name="refGenomeSource">
-           <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-               <option value="indexed">Use a built-in index</option>
-               <option value="history">Use one from the history</option>
-           </param>
-           <when value="indexed">
-	     <repeat name="series" title="Add alignment files">
-	       <param name="input" type="data" label="Select multiple alignments to parse">
-                  <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
-               </param>
-	       <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
-	     </repeat>
-           </when>
-           <when value="history">
-	     <repeat name="series" title="Add alignment files">
-	       <param name="input" type="data" label="Select multiple alignments to parse"/>
-	       <param name="norm" type="integer" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
-	     </repeat>
-	   </when>
-       </conditional>
-                <param name="gff" type="data" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
-                 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
-                <param name="minquery" type="integer" size="3" value="18" label="Min size of query small RNAs" help="'18' = 18 nucleotides"/>
-                <param name="maxquery" type="integer" size="3" value="28" label="Max size of query small RNAs" help="'28' = 28 nucleotides"/>
-                <param name="title" type="text" size="15" value= "Readmaps and size distributions" label="Main Titles"/>
-                <param name="xlabel" type="text" size="15" value="Coordinates/read size" label="x axis label"/>
-                <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
-                <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
-		  <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
-                </param>
-  </inputs>
-   <configfiles>
-     <configfile name="plotCode">
-      ## Setup R error handling to go to stderr
-      options( show.error.messages=F,
-               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-      library(RColorBrewer)
-      library(lattice)
-      library(latticeExtra)
-      library(grid)
-      library(gridExtra)
-      ##cheetahtemplate data frame implementation
-
-      rm=read.delim("${readmap_dataframe}", header=T, row.names=NULL)
-      pdf(file="${readmap_PDF}", paper="special", height=11.69, width=8.2677)
-      n_samples=length(unique(rm\$sample))
-
-      genes=unique(levels(rm\$gene))
-      per_gene_readmap=lapply(genes, function(x) subset(rm, gene==x))
-      n_genes=length(per_gene_readmap)
-      
-
-      par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), fontsize = list(text=96/${rows_per_page}, points=8))
-      par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-3), fontsize = list(text=96/${rows_per_page}, points=8))
-      par.settings.combination.readmap=list(layout.heights=list(top.padding=0, bottom.padding=-3), fontsize = list(text=96/${rows_per_page}, points=8))
-      par.settings.combination.size=list(layout.heights=list(top.padding=-2, bottom.padding=-0.5), fontsize = list(text=96/${rows_per_page}, points=8))
-      
-
-      plot_readmap=function(df, ...) {
-      combineLimits(xyplot(count~coord|factor(sample, levels=unique(sample))+reorder(gene, count, function(x) -sum(abs(x))), 
-      data=df, 
-      type='h', 
-      scales= list(relation="free", x=list(rot=0, cex=0.75, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.75)),
-      xlab=NULL, main=NULL, ylab=NULL, 
-      as.table=T, 
-      origin = 0, 
-      horizontal=FALSE, 
-      group=polarity,
-      col=c("red","blue"),
-      ...))
-      }
-
-      plot_size_distribution= function(df, ...) {
-          smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
-         bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
-          horizontal=FALSE,
-	  group=polarity,
-	  stack=TRUE,
-          col=c('red', 'blue'),
-          cex=0.75,
-          scales=list(y=list(tick.number=4, rot=90, relation="free"), cex=0.75),
-          prepanel=smR.prepanel,
-          xlab = NULL,
-          ylab = NULL,
-#          par.settings=list(layout.heights=list(top.padding=-2, bottom.padding=-3), fontsize = list(text=8, points=8)),
-          main = NULL , as.table=TRUE, newpage = T, ...)
-          combineLimits(bc)
-          }
-
-      for (i in seq(1,n_genes,${rows_per_page})) {
-        start=i
-        end=i+${rows_per_page}-1
-        if (end>n_genes) {end=n_genes}
-	readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.readmap))
-	args.list=c(readmap_plot.list, list(nrow=${rows_per_page}, ncol=1, main="${title}", left="${ylabel}", sub="readmap coordinate"))
-        do.call(grid.arrange, args.list)
-      }
-
-      devname=dev.off()
-
-      size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
-      per_gene_size=lapply(genes, function(x) subset(size, gene==x))
-
-      pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677)
-
-      for (i in seq(1,n_genes,${rows_per_page})) {
-        start=i
-        end=i+${rows_per_page}-1
-        if (end>n_genes) {end=n_genes}
-        plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size))
-        args.list=c(plot.list, list(nrow=${rows_per_page}, ncol=1, main="${title}", left="${ylabel}", sub="readsize in nucleotides"))
-        do.call(grid.arrange, args.list)
-      }
-
-      devname=dev.off()
-
-      pdf(file="${combi_PDF}", paper="special", height=11.69, width=8.2677)
-
-      for (i in seq(1,n_genes,${rows_per_page}/2)) {
-        start=i
-        end=i+${rows_per_page}/2-1
-        if (end>n_genes) {end=n_genes}
-	read_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, par.settings=par.settings.combination.readmap))
-        size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, strip=FALSE, par.settings=par.settings.combination.size))
-	plot.list=rbind(read_plot.list, size_plot.list )
-        args.list=c(plot.list, list(nrow=${rows_per_page}, ncol=1, main="${title}", left="${ylabel}", sub="${xlabel}"))
-        do.call(grid.arrange, args.list)
-      }
-
-      devname=dev.off()
-
-
-     </configfile>
-   </configfiles>
-
-   <outputs>
-   <data format="tabular" name="readmap_dataframe" label="Readmap dataframe"/>
-   <data format="tabular" name="size_distribution_dataframe" label="Size distributionn dataframe"/>
-   <data format="pdf" name="readmap_PDF" label="Readmaps"/>
-   <data format="pdf" name="size_PDF" label="Size distribution"/>
-   <data format="pdf" name="combi_PDF" label="Size distributiona and Readmaps"/>
-   </outputs>
-<help>
-
-**What it does**
-
-Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a "Readmap", 
-where by default for each "chromosome" the position of the read is recorded on the x-axis, and the y-axis indicates 
-the number of reads per position. Reads that map in sense are on the top, reads that map antisense are on the bottom.
-
-
-.. class:: warningmark
-
-'''TIP''' The input data can be produced using the sRbowtie tool.
-
-----
-
-'''Example'''
-
-Query sequence::
-For a SAM file as the following:
-
-  5	16	2L_79	24393	255	17M	*	0	0	CCTTCATCTTTTTTTTT	IIIIIIIIIIIIIIIII	XA:i:0	MD:Z:17	NM:i:0
-
-  11	0	2R_1	12675	255	21M	*	0	0	AAAAAAAACGCGTCCTTGTGC	IIIIIIIIIIIIIIIIIIIII	XA:i:0	MD:Z:21	NM:i:0
-
-  2	16	2L_5	669	255	23M	*	0	0	TGTTGCTGCATTTCTTTTTTTTT	IIIIIIIIIIIIIIIIIIIIIII	XA:i:0	MD:Z:23	NM:i:0
-
-produce a plot like this:
-
-----
-
-.. image:: static/images/readmap.png 
-    :height: 800 
-    :width: 500
-
-</help>
-</tool>
--- a/mississippi_gcc/sRbowtie.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-#!/usr/bin/env python
-# small RNA oriented bowtie wrapper
-# version 1.01 29-5-2014
-# Usage sRbowtie.py <1 input_fasta_file> <2 alignment method> <3 -v mismatches> <4 out_type> <5 buildIndexIfHistory> <6 fasta/bowtie index> <7 bowtie output> <8 ali_fasta> <9 unali_fasta> <10 --num-threads \${GALAXY_SLOTS:-4}>
-# current rev: for bowtie __norc, move from --supress 2,6,7,8 to --supress 6,7,8. Future Parser must be updated to take into account this standardisation
-# To Do:
-# implement an arg parser
-# Christophe Antoniewski <drosofff@gmail.com>
-
-import sys, os, subprocess, tempfile, shutil
-
-def stop_err( msg ):
-    sys.stderr.write( '%s\n' % msg )
-    sys.exit()
-
-def bowtieCommandLiner (alignment_method, v_mis, out_type, aligned, unaligned, input, index, output, pslots="12"):
-    if alignment_method=="RNA":
-        x = "-v %s -M 1 --best --strata -p %s --norc --suppress 6,7,8" % (v_mis, pslots)
-    elif alignment_method=="unique":
-        x =  "-v %s -m 1 -p %s --suppress 6,7,8" % (v_mis, pslots)
-    elif  alignment_method=="multiple":
-        x = "-v %s -M 1 --best --strata -p %s --suppress 6,7,8" % (v_mis, pslots)
-    elif alignment_method=="k_option":
-        x = "-v %s -k 1 --best -p %s --suppress 6,7,8" % (v_mis, pslots)
-    elif alignment_method=="n_option":
-        x = "-n %s -M 1 --best -p %s --suppress 6,7,8" % (v_mis, pslots)
-    elif alignment_method=="a_option":
-        x = "-v %s -a --best -p %s --suppress 6,7,8" % (v_mis, pslots)
-    if aligned == "None" and unaligned == "None": fasta_command = ""
-    elif aligned != "None" and unaligned == "None": fasta_command= " --al %s" % aligned
-    elif aligned == "None" and unaligned != "None": fasta_command = " --un %s" % unaligned
-    else: fasta_command = " --al %s --un %s" % (aligned, unaligned)
-    x = x + fasta_command
-    if out_type == "tabular":
-        return "bowtie %s %s -f %s > %s" % (x, index, input, output)
-    elif out_type=="sam":
-        return "bowtie %s -S %s -f %s > %s" % (x, index, input, output)
-    elif out_type=="bam":
-        return "bowtie %s -S %s -f %s |samtools view -bS - > %s" % (x, index, input, output)
-
-def bowtie_squash(fasta):
-  tmp_index_dir = tempfile.mkdtemp() # make temp directory for bowtie indexes
-  ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
-  ref_file_name = ref_file.name
-  ref_file.close() # by default, delete the temporary file, but ref_file.name is now stored in ref_file_name
-  os.symlink( fasta, ref_file_name ) # symlink between the fasta source file and the deleted ref_file name
-  cmd1 = 'bowtie-build -f %s %s' % (ref_file_name, ref_file_name ) # bowtie command line, which will work after changing dir (cwd=tmp_index_dir)
-  try:
-    FNULL = open(os.devnull, 'w')
-    tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name # a path string for a temp file in tmp_index_dir. Just a string
-    tmp_stderr = open( tmp, 'wb' ) # creates and open a file handler pointing to the temp file
-    proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=FNULL, stdout=FNULL ) # both stderr and stdout of bowtie-build are redirected in  dev/null
-    returncode = proc.wait()
-    tmp_stderr.close()
-    FNULL.close()
-    sys.stdout.write(cmd1 + "\n")
-  except Exception, e:
-    # clean up temp dir
-    if os.path.exists( tmp_index_dir ):
-      shutil.rmtree( tmp_index_dir )
-      stop_err( 'Error indexing reference sequence\n' + str( e ) )
-  # no Cleaning if no Exception, tmp_index_dir has to be cleaned after bowtie_alignment()
-  index_full_path = os.path.join(tmp_index_dir, ref_file_name) # bowtie fashion path without extention
-  return tmp_index_dir, index_full_path  
-  
-def bowtie_alignment(command_line, flyPreIndexed=''):
-  # make temp directory just for stderr
-  tmp_index_dir = tempfile.mkdtemp()
-  tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
-  tmp_stderr = open( tmp, 'wb' )
-  # conditional statement for sorted bam generation viewable in Trackster
-  if "samtools" in command_line:
-    target_file = command_line.split()[-1] # recover the final output file name
-    path_to_unsortedBam = os.path.join(tmp_index_dir, "unsorted.bam")
-    path_to_sortedBam = os.path.join(tmp_index_dir, "unsorted.bam.sorted")
-    first_command_line = " ".join(command_line.split()[:-3]) + " -o " + path_to_unsortedBam + " - "
-    # example: bowtie -v 0 -M 1 --best --strata -p 12 --suppress 6,7,8 -S /home/galaxy/galaxy-dist/bowtie/Dmel/dmel-all-chromosome-r5.49 -f /home/galaxy/galaxy-dist/database/files/003/dataset_3460.dat |samtools view -bS -o /tmp/tmp_PgMT0/unsorted.bam -
-    second_command_line = "samtools sort  %s %s" % (path_to_unsortedBam, path_to_sortedBam) # generates an "unsorted.bam.sorted.bam file", NOT an "unsorted.bam.sorted" file
-    p = subprocess.Popen(args=first_command_line, cwd=tmp_index_dir, shell=True, stderr=tmp_stderr.fileno()) # fileno() method return the file descriptor number of tmp_stderr
-    returncode = p.wait()
-    sys.stdout.write("%s\n" % first_command_line + str(returncode))
-    p = subprocess.Popen(args=second_command_line, cwd=tmp_index_dir, shell=True, stderr=tmp_stderr.fileno())
-    returncode = p.wait()
-    sys.stdout.write("\n%s\n" % second_command_line + str(returncode))
-    if os.path.isfile(path_to_sortedBam + ".bam"):
-      shutil.copy2(path_to_sortedBam + ".bam", target_file)
-  else:
-    p = subprocess.Popen(args=command_line, shell=True, stderr=tmp_stderr.fileno())
-    returncode = p.wait()
-    sys.stdout.write(command_line + "\n")
-  tmp_stderr.close()
-  ## cleaning if the index was created in the fly
-  if os.path.exists( flyPreIndexed ):
-    shutil.rmtree( flyPreIndexed )
-  # cleaning tmp files and directories
-  if os.path.exists( tmp_index_dir ):
-    shutil.rmtree( tmp_index_dir )
-  return
-
-def __main__():
-  F = open (sys.argv[7], "w")
-  if sys.argv[5] == "history":
-    tmp_dir, index_path = bowtie_squash(sys.argv[6])
-  else:
-    tmp_dir, index_path = "dummy/dymmy", sys.argv[6]
-  command_line = bowtieCommandLiner(sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[8], sys.argv[9], sys.argv[1], index_path, sys.argv[7], sys.argv[10])
-  bowtie_alignment(command_line, flyPreIndexed=tmp_dir)
-  F.close()
-if __name__=="__main__": __main__()
--- a/mississippi_gcc/sRbowtie.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,210 +0,0 @@
-<tool id="bowtieForSmallRNA" name="sRbowtie" version="1.0.1">
-  <description>for FASTA small reads</description>
-  <requirements>
-	<requirement type='package'>bowtie</requirement>
-  </requirements>
-  <parallelism method="basic"></parallelism>
-  <command interpreter="python"> sRbowtie.py $input
-                                                    $method
-                                                    $v_mismatches
-                                                    $output_type
-                                                    $refGenomeSource.genomeSource
-                                                    ## the very source of the index (indexed or fasta file)
-                                                    #if $refGenomeSource.genomeSource == "history":
-                                                        $refGenomeSource.ownFile
-                                                    #else:
-                                                        $refGenomeSource.index.fields.path
-                                                    #end if
-                                                    $output
-                                                    $aligned
-                                                    $unaligned
-						    \${GALAXY_SLOTS:-4} ## number of processors to be handled by bowtie
-  </command>
-  <inputs>
-      <param name="input" type="data" format="fasta" label="Input fasta file: reads clipped from their adapter" help="Only with clipped, raw fasta files"/>
-<!-- which method will be used --> 
-      <param name="method" type="select" label="What kind of matching do you want to do?" help="bowtie parameters adjusted to the type of matching. RNA option match to only one strand">
-        <option value="RNA">Match on sense strand RNA reference index, multiple mappers randomly matched at a single position</option>
-        <option value="unique">Match unique mappers on DNA reference index</option>
-        <option value="multiple" selected="true">Match on DNA, multiple mappers randomly matched at a single position</option>
-        <option value="k_option">Match on DNA as fast as possible, without taking care of mapping issues (for raw annotation of reads)</option>
-        <option value="n_option">Match on DNA - RNAseq mode (-n bowtie option)</option>
-        <option value="a_option">Match and report all valid alignments</option>
-      </param>
-<!-- END of which method will be used -->
-    <param name="v_mismatches" type="select" label="Number of mismatches allowed" help="specify the -v bowtie option">
-        <option value="0">0</option>
-        <option value="1" selected="true">1</option>
-        <option value="2">2</option>
-        <option value="3">3</option>
-    </param>
-<!-- bowtie index selection -->
-    <conditional name="refGenomeSource">
-      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact GED team">
-          <options from_data_table="bowtie_indexes">
-      <!--      <filter type="sort_by" column="2" />
-            <validator type="no_options" message="No indexes are available" /> -->
-          </options>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
-      </when>
-    </conditional>
-<!-- END bowtie index selection -->
-       <param name="output_type" type="select" label="Select output format" help="Note that the BAM will be viewable in trackster only if you choose a full genome referenced for Trackster usage. see the doc below">
-          <option value="tabular" select="true">tabular</option>
-          <option value="sam">sam</option>
-          <option value="bam">bam</option>
-       </param>
-       <param name="additional_fasta" type="select" label="additional fasta output" help="to get aligned and unaligned reads in fasta format">
-          <option value="No" select="true">No</option>
-          <option value="al">aligned</option>
-          <option value="unal">unaligned</option>
-          <option value="al_and_unal">both aligned and unaligned</option>
-       </param>
-   </inputs>
-   <outputs>
-   <data format="tabular" name="output" label="Bowtie Output">
-        <change_format>
-            <when input="output_type" value="sam" format="sam" />
-            <when input="output_type" value="bam" format="bam" />
-        </change_format>
-<!-- Set metadata based on reference genome -->
-      <actions>
-        <conditional name="refGenomeSource.genomeSource">
-          <when value="indexed">
-            <action type="metadata" name="dbkey">
-              <option type="from_data_table" name="bowtie_indexes" column="1" offset="0">
-                <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/>
-                <filter type="param_value" ref="refGenomeSource.index" column="0"/>
-              </option>
-            </action>
-          </when>
-          <when value="history">
-            <action type="metadata" name="dbkey">
-              <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" />
-            </action>
-          </when>
-        </conditional>
-      </actions>
-   </data>
-   <data format="fasta" name="aligned" label="Matched reads">
-	<filter>additional_fasta == "al" or additional_fasta == "al_and_unal"</filter>
-   </data>
-   <data format="fasta" name="unaligned" label ="Unmatched reads">
-        <filter>additional_fasta == "unal" or additional_fasta == "al_and_unal"</filter>
-   </data>
-   </outputs>
-
-    <test>
-      <param name="genomeSource" value="indexed" />
-      <param name="index" value="/home/galaxy/galaxy-dist/bowtie/Dmel/dmel-all-chromosome-r5.49" />
-      <param name="method" value="multiple" />
-      <param name="input" ftype="fasta" value="sRbowtie.fa" />
-      <param name="v_mismatches" value="1" />
-      <param name="output_type" value="tabular" />
-      <output name="output" ftype="tabular" value="sRbowtie.out" />
-      <output name="aligned" value="None" />
-      <output name="unaligned" value="None" />
-    </test>
-
-  <help>
-
-**What it does**
-
-Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. It is developed by Ben Langmead and Cole Trapnell. Please cite: Langmead B, Trapnell C, Pop M, Salzberg SL. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25.
-
-.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml
-
-A generic "Map with Bowtie for Illumina" Galaxy tool is available in the main Galaxy distribution.
-
-However, this Bowtie wrapper tool only takes FASTQ files as inputs.
-
-The sRbowtie wrapper specifically works with short reads FASTA inputs (-v bowtie mode)
-
-------
-
-**OPTIONS**
-
-.. class:: infomark
-
-This script uses Bowtie to match reads on a reference index.
-
-Depending on the type of matching, different bowtie options are used:
-
-**Match on sense strand RNA reference index, multiple mappers randomly matched at a single position**
-
-Match on RNA reference, SENSE strand, randomly attributing multiple mapper to target with least mismatches, the polarity column is suppressed in the bowtie tabular report:
-
-*-v [0,1,2,3] -M 1 --best --strata -p 12 --norc --suppress 2,6,7,8*
-
-**Match unique mappers on DNA reference index**
-
-Match ONLY unique mappers on DNA reference index
-
-*-v [0,1,2,3] -m 1 -p 12 --suppress 6,7,8*
-
-Note that using this option with -v values other than 0 is questionnable...
-
-**Match on DNA, multiple mappers randomly matched at a single position**
-
-Match multiple mappers, randomly attributing multiple mapper to target with least mismatches, number of mismatch allowed specified by -v option:
-
-*-v [0,1,2,3] -M 1 --best --strata -p 12 --suppress 6,7,8*
-
-**Match on DNA as fast as possible, without taking care of mapping issues (for raw annotation of reads)**
-
-Match with highest speed, not guaranteeing best hit for speed gain:
-
-*-v [0,1,2,3] -k 1 --best -p 12 --suppress 6,7,8*
-
-
------
-
-**Input formats**
-
-.. class:: warningmark
-
-*The only accepted format for the script is a raw fasta list of reads, clipped from their adapter*
-
------
-
-**OUTPUTS**
-
-If you choose tabular as the output format, you will obtain the matched reads in standard bowtie output format, having the following columns::
-
-    Column    Description
-  --------    --------------------------------------------------------
-   1 FastaID  fasta identifier
-   2 polarity + or - depending whether the match was reported on the forward or reverse strand
-   3 target     name of the matched target
-   4 Offset   O-based coordinate of the miR on the miRBase pre-miR sequence
-   5 Seq      sequence of the matched Read
-
-If you choose SAM, you will get the output in unordered SAM format.
-
-.. class:: warningmark
-
-if you choose BAM, the output will be in sorted BAM format.
-To be viewable in Trackster, several condition must be fulfilled:
-
-.. class:: infomark
-
-Reads must have been matched to a genome whose chromosome names are compatible with Trackster genome indexes
-
-.. class:: infomark
-
-the database/Build (dbkey) which is indicated for the dataset (Pencil - Database/Build field) must match a Trackster genome index.
-
-Please contact the Galaxy instance administrator if your genome is not referenced
-
-**Matched and unmatched fasta reads can be retrieved, for further analyses**
-
-  </help>
-</tool>
--- a/mississippi_gcc/sRbowtieCascade.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-#!/usr/bin/env python
-# small RNA oriented bowtie wrapper in cascade for small RNA data set genome annotation
-# version 0.9 13-6-2014
-# Usage sRbowtie_cascade.py see Parser() for valid arguments
-# Christophe Antoniewski <drosofff@gmail.com>
-
-import sys, os, subprocess, tempfile, shutil, argparse
-from collections import defaultdict
-
-def Parser():
-  the_parser = argparse.ArgumentParser()
-  the_parser.add_argument('--output', action="store", type=str, help="output file")
-  the_parser.add_argument('--num-threads', dest="num_threads", action="store", type=str, help="number of bowtie threads")
-  the_parser.add_argument('--mismatch', action="store", type=str, help="number of mismatches allowed")
-  the_parser.add_argument('--indexing-flags', dest="indexing_flags", nargs='+', help="whether the index should be generated or not by bowtie-buid")
-  the_parser.add_argument('--index',nargs='+', help="paths to indexed or fasta references")
-  the_parser.add_argument('--indexName',nargs='+', help="Names of the indexes")
-  the_parser.add_argument('--input',nargs='+', help="paths to multiple input files")
-  the_parser.add_argument('--label',nargs='+', help="labels of multiple input files")
-  args = the_parser.parse_args()
-  return args
- 
-def stop_err( msg ):
-  sys.stderr.write( '%s\n' % msg )
-  sys.exit()
-
-def bowtie_squash(fasta):
-  tmp_index_dir = tempfile.mkdtemp() # make temp directory for bowtie indexes
-  ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
-  ref_file_name = ref_file.name
-  ref_file.close() # by default, delete the temporary file, but ref_file.name is now stored in ref_file_name
-  os.symlink( fasta, ref_file_name ) # symlink between the fasta source file and the deleted ref_file name
-  cmd1 = 'bowtie-build -f %s %s' % (ref_file_name, ref_file_name ) # bowtie command line, which will work after changing dir (cwd=tmp_index_dir)
-  try:
-    FNULL = open(os.devnull, 'w')
-    tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name # a path string for a temp file in tmp_index_dir. Just a string
-    tmp_stderr = open( tmp, 'wb' ) # creates and open a file handler pointing to the temp file
-    proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=FNULL, stdout=FNULL ) # both stderr and stdout of bowtie-build are redirected in  dev/null
-    returncode = proc.wait()
-    tmp_stderr.close()
-    FNULL.close()
-    sys.stdout.write(cmd1 + "\n")
-  except Exception, e:
-    # clean up temp dir
-    if os.path.exists( tmp_index_dir ):
-      shutil.rmtree( tmp_index_dir )
-      stop_err( 'Error indexing reference sequence\n' + str( e ) )
-  # no Cleaning if no Exception, tmp_index_dir has to be cleaned after bowtie_alignment()
-  index_full_path = os.path.join(tmp_index_dir, ref_file_name) # bowtie fashion path without extention
-  return index_full_path  
-  
-def make_working_dir():
-  working_dir = tempfile.mkdtemp()
-  return working_dir
-  
-def Clean_TempDir(directory):
-  if os.path.exists( directory ):
-    shutil.rmtree( directory )
-  return
-
-def bowtie_alignment(command_line="None", working_dir = ""):
-  FNULL = open(os.devnull, 'w')
-  p = subprocess.Popen(args=command_line, cwd=working_dir, shell=True, stderr=FNULL, stdout=FNULL)
-  returncode = p.wait()
-  sys.stdout.write("%s\n" % command_line)
-  FNULL.close()
-  #p = subprocess.Popen(["wc", "-l", "%s/al.fasta"%working_dir], cwd=working_dir, stdout=subprocess.PIPE)
-  #aligned =  p.communicate()[0].split()[0]
-  aligned = 0
-  F = open ("%s/al.fasta" % working_dir, "r")
-  for line in F:
-    aligned += 1
-  F.close()
-  sys.stdout.write("Aligned: %s\n" % aligned)
-  return aligned/2
-
-def CommandLiner (v_mis="1", pslots="12", index="dum/my", input="dum/my", working_dir=""):
-  return "bowtie -v %s -k 1 --best -p %s --al %s/al.fasta --un %s/unal.fasta --suppress 1,2,3,4,5,6,7,8 %s -f %s" % (v_mis, pslots, working_dir, working_dir, index, input)
-
-def __main__():
-  args = Parser()
-  ## first we make all indexes available. They can be already available or be squashed by bowtie-build
-  ## we keep them in a list that alternates indexPath and "toClear" or "DoNotDelete"
-  BowtieIndexList = []
-  for indexing_flags, bowtiePath in zip (args.indexing_flags, args.index):
-    if indexing_flags == "history":
-      BowtieIndexList.append ( bowtie_squash (bowtiePath) )
-      BowtieIndexList.append ( "toClear" )
-    else:
-      BowtieIndexList.append ( bowtiePath )
-      BowtieIndexList.append ( "DoNotDelete") 
-  ###### temporary Indexes are generated. They must be deleted at the end (after removing file name in the temp path) 
-  ResultDict = defaultdict(list)
-  for label, input in zip(args.label, args.input): ## the main cascade, iterating over samples and bowtie indexes
-    workingDir = make_working_dir()
-    cmd = CommandLiner (v_mis=args.mismatch, pslots=args.num_threads, index=BowtieIndexList[0], input=input, working_dir=workingDir)
-    ResultDict[label].append( bowtie_alignment(command_line=cmd, working_dir = workingDir) ) # first step of the cascade
-    if len(BowtieIndexList) > 2: # is there a second step to perform ?
-      os.rename("%s/al.fasta"%workingDir, "%s/toAlign.fasta"%workingDir) ## end of first step. the aligned reads are the input of the next step
-      cmd = CommandLiner (v_mis=args.mismatch, pslots=args.num_threads, index=BowtieIndexList[2], input="%s/toAlign.fasta"%workingDir, working_dir=workingDir)
-      ResultDict[label].append( bowtie_alignment(command_line=cmd, working_dir = workingDir) )## second step of the cascade
-    if len(BowtieIndexList) > 4:  ## remaining steps
-      for BowtieIndexPath in BowtieIndexList[4::2]:
-        os.rename("%s/unal.fasta"%workingDir, "%s/toAlign.fasta"%workingDir)
-        cmd = CommandLiner (v_mis=args.mismatch, pslots=args.num_threads, index=BowtieIndexPath, input="%s/toAlign.fasta"%workingDir, working_dir=workingDir)
-        ResultDict[label].append( bowtie_alignment(command_line=cmd, working_dir = workingDir) )
-    Fun = open("%s/unal.fasta"%workingDir, "r") ## to finish, compute the number of unmatched reads
-    n = 0
-    for line in Fun:
-      n += 1
-    ResultDict[label].append(n/2)
-    Fun.close()
-    Clean_TempDir (workingDir) # clean the sample working directory
-  ## cleaning
-  for IndexPath, IndexFlag in zip(BowtieIndexList[::2], BowtieIndexList[1::2]):
-    if IndexFlag == "toClear":
-      Clean_TempDir ("/".join(IndexPath.split("/")[:-1]))
-  ## end of cleaning
-  
-  
-    
-  F = open (args.output, "w")
-  print >> F, "alignment reference\t%s" % "\t".join(args.label)
-  for i, reference in enumerate(args.indexName):
-    F.write ("%s" % reference)
-    for sample in args.label:
-      F.write ("\t%s" % "{:,}".format(ResultDict[sample][i]) )
-    print >> F
-  F.write ("Remaining Unmatched")
-  for sample in args.label:
-    F.write ("\t%s" % "{:,}".format(ResultDict[sample][-1]) ) 
-  print >> F
-
-  F.close()
-
-if __name__=="__main__": __main__()
--- a/mississippi_gcc/sRbowtieCascade.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,142 +0,0 @@
-<tool id="sRbowtie_cascade" name="Annotate smRNA datasets" version="0.9.0">
-  <description>Using iterative sRbowtie Alignments</description>
-  <requirements>
-	<requirement type='package'>bowtie</requirement>
-  </requirements>
-  <parallelism method="basic"></parallelism>
-  <command interpreter="python"> sRbowtieCascade.py --output $output
-                                                    --num-threads \${GALAXY_SLOTS:-4} ## number of processors to be handled by bowtie
-                                                    --mismatch $mismatches
-                                                    --input
-						    #for $i in $input:
-						      $i
-						    #end for
-                                                   --label
-                                                    #for $i in $input:
-                                                      "$i.name"
-                                                    #end for
-                                                   --index
-                                                    #if $refGenomeSource1.genomeSource == "history":
-                                                      $refGenomeSource1.ownFile
-                                                    #else:
-                                                      $refGenomeSource1.index.fields.path
-                                                    #end if
-                                                    #for $i in $AdditionalQueries:
-						      #if $i.refGenomeSource.genomeSource == "history":
-						        $i.refGenomeSource.ownFile
-                                                      #else:
-                                                        $i.refGenomeSource.index.fields.path
-                                                      #end if
-                                                    #end for
-                                                   --indexing-flags
-						    $refGenomeSource1.genomeSource
-                                                    #for $i in $AdditionalQueries:
-						      $i.refGenomeSource.genomeSource
-                                                    #end for
-                                                   --indexName
-                                                    #if $refGenomeSource1.genomeSource == "history":
-                                                      "$refGenomeSource1.ownFile.name"
-                                                    #else:
-                                                      "$refGenomeSource1.index.fields.name"
-                                                    #end if
-                                                    #for $i in $AdditionalQueries:
-                                                      #if $i.refGenomeSource.genomeSource == "history":
-                                                        "$i.refGenomeSource.ownFile.name"
-                                                      #else:
-                                                        "$i.refGenomeSource.index.fields.name"
-                                                      #end if
-                                                    #end for
-  </command>
-  <inputs>
-      <param name="input" type="data" format="fasta" label="Input fasta file: reads clipped from their adapter" help="Only with clipped, raw fasta files" multiple="true"/>
-    <param name="mismatches" type="select" label="Number of mismatches allowed" help="specify the number of mismatches allowed during alignments">
-        <option value="0">0</option>
-        <option value="1" selected="true">1</option>
-        <option value="2">2</option>
-        <option value="3">3</option>
-    </param>
-<!-- First bowtie index selection -->
-    <conditional name="refGenomeSource1">
-      <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-        <option value="indexed">Use a built-in index</option>
-        <option value="history">Use one from the history</option>
-      </param>
-      <when value="indexed">
-        <param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact GED team">
-          <options from_data_table="bowtie_indexes"/>
-        </param>
-      </when>
-      <when value="history">
-        <param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
-      </when>
-    </conditional>
-<!-- End of first bowtie index selection -->
-<!-- other  bowtie index selections -->
-    <repeat name="AdditionalQueries" title="Additional Alignment Step">
-	<conditional name="refGenomeSource">
-      		<param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-			<option value="indexed">Use a built-in index</option>
-			<option value="history">Use one from the history</option>
-		</param>
-		<when value="indexed">
-			<param name="index" type="select" label="Select a DNA reference index" help="if your genome of interest is not listed - contact GED team">
-				<options from_data_table="bowtie_indexes"/>
-			</param>
-		</when>
-		<when value="history">
-			<param name="ownFile" type="data" format="fasta" label="Select a fasta file, to serve as index reference" />
-		</when>
-        </conditional>
-    </repeat>
-<!-- End of other bowtie index selections -->
-   </inputs>
-   <outputs>
-   <data format="tabular" name="output" label="Cascade Annotation Analysis"/>
-   </outputs>
-
-    <test>
-    </test>
-
-  <help>
-
-**Intro**
-
-Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient.
-A generic "Map with Bowtie for Illumina" Galaxy tool is available in the main Galaxy distribution.
-However, this Bowtie wrapper tool only takes FASTQ files as inputs.
-
-Here The sRbowtie wrapper specifically works with short reads FASTA inputs (-v bowtie mode, with -k 1)
-
-.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml
-
-
-------
-
-**What it does**
-
-.. class:: infomark
-
-This script uses the sRbowtie wrapper to iteratively match reads on a reference indexes.
-
-Reads are Matched on DNA references as fast as possible, without taking care of mapping issues
-
-*-v [0,1,2,3] -k 1 --best -p 12 --suppress 6,7,8*
-
-unaligned reads at step N are used input for sRbowtie at step N+1
-
------
-
-**Input formats**
-
-.. class:: warningmark
-
-*The only accepted format for the script is a raw fasta list of reads, clipped from their adapter*
-
------
-
-**OUTPUTS**
-
-**Annotation table**
-
-  </help>
-</tool>
--- a/mississippi_gcc/sRbowtieParser.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-#!/usr/bin/python
-# python parser module to analyse sRbowtie alignments
-# version 0.9
-# Usage sRbowtieParser.py  <1:index source> <2:extraction directive> <3:outputL> <4:polarity> <5:6:7 filePath:FileExt:FileLabel> <.. ad  lib>
-
-import sys
-from smRtools import *
-
-IndexSource = sys.argv[1]
-ExtractionDirective = sys.argv[2]
-if ExtractionDirective == "--do_not_extract_index":
-  genomeRefFormat = "fastaSource"
-elif  ExtractionDirective == "--extract_index":
-  genomeRefFormat = "bowtieIndex"
-Output = sys.argv[3]
-Polarity = sys.argv[4] # maybe "both", "forward", "reverse"
-Triplets = [sys.argv[5:][i:i+3] for i in xrange(0, len(sys.argv[5:]), 3)]
-MasterListOfGenomes = {}
-
-for [filePath, FileExt, FileLabel] in Triplets:
-  MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (filePath, FileExt, IndexSource, genomeRefFormat) 
-
-header = ["gene"]
-for [filePath, FileExt, FileLabel] in Triplets:
-  header.append(FileLabel)
-
-F = open (sys.argv[3], "w")
-print >> F, "\t".join(header)
-for item in sorted (MasterListOfGenomes[header[1]].instanceDict.keys() ):
-  line=[item]
-  for sample in header[1:]:
-    count = str (MasterListOfGenomes[sample].instanceDict[item].readcount(polarity=Polarity))
-    line.append(count)
-  print >> F,  "\t".join(line )
-F.close()
--- a/mississippi_gcc/sRbowtieParser.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,67 +0,0 @@
-<tool id="sRbowtieParser" name="Parse items in sRbowtie alignment" version="0.9.1">
-  <description></description>
-  <requirements><requirement type='package'>bowtie-inspect</requirement></requirements>
-  <parallelism method="basic"></parallelism>
-<command interpreter="python">
-        sRbowtieParser.py 
-	          #if $refGenomeSource.genomeSource == "history":
-                    $refGenomeSource.ownFile ## index source
-         	    --do_not_extract_index
-          	  #else:
-		    #silent reference= filter( lambda x: str( x[0] ) == str( $input_list.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
-            	    $reference   ## index source
-            	    --extract_index
-          	  #end if
-		  $output
-		  $polarity
-		  #for $i in $refGenomeSource.input_list
-    		    $i $i.ext "$i.name"
-		  #end for
-
-</command>
-  <inputs>
-       <conditional name="refGenomeSource">
-           <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-               <option value="indexed">Use a built-in index</option>
-               <option value="history">Use one from the history</option>
-           </param>
-           <when value="indexed">
-               <param name="input_list" type="data" label="Select multiple alignments to parse" multiple="true">
-                  <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
-               </param>
-           </when>
-           <when value="history">
-                <param name="ownFile" type="data" format="fasta"  label="Select the fasta reference" />
-	        <param name="input_list" type="data" label="Select multiple alignments to parse" multiple="true"/>
-           </when>
-       </conditional>  <!-- refGenomeSource -->
-       <param name="polarity" type="select" label="how to count sense and antisense reads">
-         <option value="both">count both sense and antisense reads</option>
-         <option value="forward">count only sense reads</option>
-         <option value="reverse">count only antisense reads</option>
-       </param>
-   </inputs>
-   <outputs>
-   <data format="tabular" name="output" label="Read Count  Lists"/>
-   </outputs>
-  <help>
-
-**What it does**
-
-Parses read counts from one or several sRBowtie alignments (in tabular, Sam or Bam format).
-
-Here a bowtie match done against an index composed of a set of items is parsed and expressed as a hit list of the corresponding items
-
-Sense, antisense or both sense and antisense alignments can be counted
-
-The library labels are infered from the input dataset names in the galaxy history.
-
-**It is thus essential that input datasets are appropriately renamed**
-
-**it is preferable that you do not put any space in this input dataset names. You may edit these names in the history**
-
-
-
-  </help>
-</tool>
-
--- a/mississippi_gcc/size_histogram.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,126 +0,0 @@
-#!/usr/bin/python
-# python parser module for size distributions, guided by GFF3
-# version 0.9.1 (1-6-2014)
-# Usage readmap.py  <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3>
-#                     <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF">
-#                     <9:10:11 filePath:FileExt:FileLabel> <.. ad  lib>
-
-import sys, subprocess, argparse
-from smRtools import *
-from collections import OrderedDict, defaultdict
-import os
-
-def Parser():
-  the_parser = argparse.ArgumentParser()
-  the_parser.add_argument('--output_size_distribution', action="store", type=str, help="size distribution dataframe")
-  the_parser.add_argument('--reference_fasta', action="store", type=str, help="output file")
-  the_parser.add_argument('--reference_bowtie_index',action='store', help="paths to indexed or fasta references")
-  the_parser.add_argument('--input',nargs='+', help="paths to multiple input files")
-  the_parser.add_argument('--ext',nargs='+', help="input file type")
-  the_parser.add_argument('--label',nargs='+', help="labels of multiple input files")
-  the_parser.add_argument('--normalization_factor',nargs='+', type=float, help="Normalization factor for input file")
-  the_parser.add_argument('--gff', type=str, help="GFF containing regions of interest")
-  the_parser.add_argument('--minquery', type=int, help="Minimum readsize")
-  the_parser.add_argument('--maxquery', type=int, help="Maximum readsize")
-  the_parser.add_argument('--rcode', type=str, help="R script")
-  the_parser.add_argument('--global_size', action="store_true", help="if specified, size distribution is calcilated for the sum of all items")
-  the_parser.add_argument('--collapse', action="store_true", help="if specified, forward and reverse reads are collapsed")
-  args = the_parser.parse_args()
-  return args
-
-args=Parser()
-if args.reference_fasta:
-  genomeRefFormat = "fastaSource"
-  genomeRefFile = args.reference_fasta  
-if args.reference_bowtie_index:
-  genomeRefFormat = "bowtieIndex"
-  genomeRefFile = args.reference_bowtie_index  
-size_distribution_file=args.output_size_distribution
-minquery=args.minquery
-maxquery=args.maxquery
-Rcode = args.rcode
-filePath=args.input
-fileExt=args.ext
-fileLabel=args.label
-normalization_factor=args.normalization_factor
-global_size=args.global_size
-collapse=args.collapse
-
-if collapse:
-  pol=["both"]
-else:
-  pol=["F", "R"]
-
-MasterListOfGenomes = OrderedDict()
-
-def process_samples(filePath):
-  for i, filePath in enumerate(filePath):
-    norm=normalization_factor[i]
-    print fileLabel[i]
-    MasterListOfGenomes[fileLabel[i]] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=fileExt[i], genomeRefFile=genomeRefFile, genomeRefFormat=genomeRefFormat,\
-                        biosample=fileLabel[i], size_inf=minquery, size_sup=maxquery, norm=norm)
-  return MasterListOfGenomes
-
-def write_size_distribution_dataframe(readDict, size_distribution_file):
-  with open(size_distribution_file, 'w') as size_distrib:
-    print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample"
-    for sample in readDict.keys():
-      if args.gff:
-        dict=readDict[sample]
-      else:
-        dict=readDict[sample].instanceDict
-      for gene in dict.keys():
-        histogram = dict[gene].size_histogram()
-        for polarity in histogram.keys():
-          for item in histogram[polarity].iteritems():
-            print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, item[0], item[1], sample)
-
-def write_size_distribution_dataframe_global(readDict, size_distribution_file, pol=["both"]):
-  with open(size_distribution_file, 'w') as size_distrib:
-    print >>size_distrib, "gene\tpolarity\tsize\tcount\tsample"
-    for sample in readDict.keys():
-      histogram = readDict[sample].size_histogram()
-      gene="sample"
-      for polarity in pol:
-        for item in histogram[polarity].iteritems():
-          if polarity=="R":
-            print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, item[0], -item[1], sample)
-          else:
-            print >>size_distrib, "%s\t%s\t%s\t%s\t%s" % (gene, polarity, item[0], item[1], sample)
-
-def gff_item_subinstances(readDict, gff3):
-  GFFinstanceDict=OrderedDict()
-  with open(gff3) as gff:
-    for line in gff:
-      if line[0] == "#": continue
-      gff_fields = line[:-1].split("\t")
-      chrom = gff_fields[0]
-      gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name
-      item_upstream_coordinate = int(gff_fields[3])
-      item_downstream_coordinate = int(gff_fields[4])
-      item_polarity = gff_fields[6]
-      for sample in readDict.keys():
-	if not GFFinstanceDict.has_key(sample):
-          GFFinstanceDict[sample]={}
-        subinstance=extractsubinstance(item_upstream_coordinate, item_downstream_coordinate, readDict[sample].instanceDict[chrom])
-        if item_polarity == '-':
-          subinstance.readDict={key*-1:value for key, value in subinstance.readDict.iteritems()}
-#          subinstance.readDict.setdefault(key, [])
-        subinstance.gene=gff_name
-        GFFinstanceDict[sample][gff_name]=subinstance
-  return GFFinstanceDict
-
-MasterListOfGenomes=process_samples(filePath)
-
-if args.gff:
-  MasterListOfGenomes=gff_item_subinstances(MasterListOfGenomes, args.gff)
-
-if global_size:
-  write_size_distribution_dataframe_global(MasterListOfGenomes, size_distribution_file, pol)
-else:
-  write_size_distribution_dataframe(MasterListOfGenomes, size_distribution_file)
-
-R_command="Rscript "+ Rcode
-process = subprocess.Popen(R_command.split())
-process.wait()
-	
--- a/mississippi_gcc/size_histogram.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,200 +0,0 @@
-<tool id="Size_histogram" name="Generate size histograms from alignment files" version="0.9.0">
-  <description>from sRbowtie aligment</description>
-  <requirements><requirement type='package'>bowtie-inspect</requirement></requirements>
-  <parallelism method="basic"></parallelism>
-<command interpreter="python">
-        size_histogram.py 
-	          #if $refGenomeSource.genomeSource == "history":
-         	    --reference_fasta  ## sys.argv[2]
-                    $refGenomeSource.ownFile ## index source
-          	  #else:
-                    #silent reference= filter( lambda x: str( x[0] ) == str( $refGenomeSource.series[0].input.dbkey ), $__app__.tool_data_tables[ 'bowtie_indexes' ].get_fields() )[0][-1]
-		    --reference_bowtie_index
-                    $reference
-          	  #end if
-		  --rcode
-		  $plotCode
-		  --output_size_distribution
-		  $size_distribution_dataframe
-		  --minquery
-		  $minquery
-		  --maxquery
-		  $maxquery
-		  --input
-		  #for $i in $refGenomeSource.series
-    		    $i.input 
-		  #end for
-		  --ext
-		  #for $i in $refGenomeSource.series
-    		    $i.input.ext 
-		  #end for
-		  --label
-		  #for $i in $refGenomeSource.series
-    		    "$i.input.name" 
-		  #end for
-		  --normalization_factor
-		  #for $i in $refGenomeSource.series
-    		    $i.norm
-		  #end for
-		  #if $gff:
-		    --gff
-                    $gff
-                  #end if
-                  #if $global.value == 'yes':
-                    --global_size
-                  #end if
-                  #if $collapsestrands.value == 'yes':
-                    --collapse
-                  #end if
-
-</command>
-  <inputs>
-       <conditional name="refGenomeSource">
-           <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
-               <option value="indexed">Use a built-in index</option>
-               <option value="history">Use one from the history</option>
-           </param>
-           <when value="indexed">
-	     <repeat name="series" title="Add alignment files">
-	       <param name="input" type="data" label="Select multiple alignments to parse">
-                  <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="database not set for this bowtie output. Select the database(=genome used for matching) manually, or select a reference fasta from your history."/>
-               </param>
-	       <param name="norm" type="float" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
-	     </repeat>
-           </when>
-           <when value="history">
-	     <repeat name="series" title="Add alignment files">
-	       <param name="input" type="data" label="Select multiple alignments to parse"/>
-	       <param name="norm" type="integer" value="1" label="Indicate a normalization factor to compare multiple aligments"/>
-	     </repeat>
-	   </when>
-       </conditional>
-                <param name="gff" type="data" optional="true" label="Optional: select a GFF to investigate regions of interest" help="GFF must match genome build"/>
-                 <!-- <validator type="dataset_metadata_in_data_table" table_name="bowtie_indexes" metadata_name="dbkey" metadata_column="0" message="GFF database and alignment file databse do not match!"/> -->
-		<param name="global" type="select" label="Generate size distribution for each item, or generate a global alignment">
-                  <option value="no">for each item</option>
-                  <option value="yes">global</option>
-                </param>
-                <param name="collapsestrands" type="select" label="Whether + and - reads should be collapsed or not">
-                  <option value="no">Do not collapse</option>
-                  <option value="yes">Collapse + and - reads</option>
-                </param>
-                <param name="minquery" type="integer" size="3" value="18" label="Min size of reads to plot" help="'15' = 15 nucleotides"/>
-                <param name="maxquery" type="integer" size="3" value="28" label="Max size of reads to plot" help="'30' = 30 nucleotides"/>
-                <param name="title" type="text" size="15" value="Size distribution" label="Main Titles"/>
-                <param name="xlabel" type="text" size="15" value="Size in nucleotides" label="x axis label"/>
-                <param name="ylabel" type="text" size="15" value="Number of reads" label="y axis label"/>
-                <param name="rows_per_page" type="text" size="9" value="8" label="How many items to display per page?">
-                  <validator type="in_range" min="6" max="20" message="Select between 6 and 20 rows, as the readability will suffer otherwise."/>
-		</param>
-  </inputs>
-   <configfiles>
-     <configfile name="plotCode">
-      ## Setup R error handling to go to stderr
-      options( show.error.messages=F,
-               error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-      library(RColorBrewer)
-      library(lattice)
-      library(latticeExtra)
-      library(grid)
-      library(gridExtra)
-      ##cheetahtemplate data frame implementation
-
-      size=read.delim("${size_distribution_dataframe}", header=T, row.names=NULL)
-
-      n_samples=length(unique(size\$sample))
-      genes=unique(levels(size\$gene))
-      n_genes=length(genes)
-
-      par.settings.size=list(layout.heights=list(top.padding=-1, bottom.padding=-3, strip = .75), fontsize = list(text=96/${rows_per_page}, points=8))
-      smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
-
-      plot_size_distribution= function(df, ...) {
-         bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample))+gene, data = df, origin = 0,
-          horizontal=FALSE,
-	  group=polarity,
-	  stack=TRUE,
-          col=c('red', 'blue'),
-          strip = strip.custom(par.strip.text = list(cex = 0.5)),
-          cex=0.75,
-          scales=list(y=list(tick.number=4, rot=90, relation="free"), cex=0.75),
-          xlab = "readsize in nucleotides",
-          ylab = "${ylabel}",
-          main="${title}" ,
-          as.table=TRUE, newpage = T, ...)
-          combineLimits(update(useOuterStrips(bc), layout=c(n_samples,${rows_per_page})), margin.x=F, margin.y=1)
-          }
-
-      per_gene_size=lapply(genes, function(x) subset(size, gene==x))
-
-      global = "no"
-      #if $global.value == 'yes':
-        global = "yes"
-      #end if
-
-      if (global=="no") {
-      options(warn=-1)
-      pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677*n_samples/4)
-      plot_size_distribution(size, par.settings=par.settings.size, prepanel=smR.prepanel)
-      } else {
-      pdf(file="${size_PDF}", paper="special", height=11.69, width=8.2677)
-          bc= barchart(count~as.factor(size)|factor(sample, levels=unique(sample)), data = size, origin = 0,
-          horizontal=FALSE,
-	  group=polarity,
-	  stack=TRUE,
-          col=c('red', 'blue'),
-          cex=0.75,
-	  par.settings=list(fontsize = list(text=8, points=8)),
-          scales=list(y=list(tick.number=4, rot=90, relation="same"), cex=0.75),
-          xlab = "readsize in nucleotides",
-          ylab = "${ylabel}",
-          main="${title}" , as.table=TRUE, newpage = T,
-          aspect=0.5)
-          #layout=c(n_samples, ${rows_per_page}))
-          bc
-      }
-      devname=dev.off()
-
-     </configfile>
-   </configfiles>
-
-   <outputs>
-   <data format="tabular" name="size_distribution_dataframe" label="Size distributionn dataframe"/>
-   <data format="pdf" name="size_PDF" label="Size distribution"/>
-   </outputs>
-<help>
-
-**What it does**
-
-Takes one or more alignment files (BAM, SAM or tabular bowtie output) as input and produces a histogram of read sizes, 
-where by default for each "chromosome" a histogram of read sizes is drawn. 
-Reads that map in sense are on the top (red), reads that map antisense are on the bottom (blue).
-
-
-.. class:: warningmark
-
-'''TIP''' The input data can be produced using the sRbowtie tool.
-
-----
-
-'''Example'''
-
-Query sequence::
-For a SAM file as the following:
-
-  5	16	2L_79	24393	255	17M	*	0	0	CCTTCATCTTTTTTTTT	IIIIIIIIIIIIIIIII	XA:i:0	MD:Z:17	NM:i:0
-
-  11	0	2R_1	12675	255	21M	*	0	0	AAAAAAAACGCGTCCTTGTGC	IIIIIIIIIIIIIIIIIIIII	XA:i:0	MD:Z:21	NM:i:0
-
-  2	16	2L_5	669	255	23M	*	0	0	TGTTGCTGCATTTCTTTTTTTTT	IIIIIIIIIIIIIIIIIIIIIII	XA:i:0	MD:Z:23	NM:i:0
-
-produce a plot like this:
-
-----
-
-.. image:: static/images/size_histogram.png 
-    :height: 800 
-    :width: 500
-
-</help>
-</tool>
--- a/mississippi_gcc/smRtools.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,704 +0,0 @@
-#!/usr/bin/python
-# version 1 7-5-2012 unification of the SmRNAwindow class
-
-import sys, subprocess
-from collections import defaultdict
-from numpy import mean, median, std
-from scipy import stats
-
-def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"):
-  '''This function will return a dictionary containing fasta identifiers as keys and the
-  sequence as values. Index must be the path to a fasta file.'''
-  p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines
-  outputlines = p.stdout.readlines()
-  p.wait()
-  item_dic = {}
-  for line in outputlines:
-    if (line[0] == ">"):
-      try:
-        item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item
-      except: pass
-      current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers !
-      item_dic[current_item] = ""
-      stringlist=[]
-    else:
-      stringlist.append(line.rstrip() )
-  item_dic[current_item] = "".join(stringlist) # for the last item
-  return item_dic
-
-def get_fasta_headers (index):
-  p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines
-  outputlines = p.stdout.readlines()
-  p.wait()
-  item_dic = {}
-  for line in outputlines:
-    header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers !
-    item_dic[header] = 1
-  return item_dic
-
-
-def get_file_sample (file, numberoflines):
-  '''import random to use this function'''
-  F=open(file)
-  fullfile = F.read().splitlines()
-  F.close()
-  if len(fullfile) < numberoflines:
-    return "sample size exceeds file size"
-  return random.sample(fullfile, numberoflines)
-
-def get_fasta_from_history (file):
-  F = open (file, "r")
-  item_dic = {}
-  for line in F:
-    if (line[0] == ">"):
-      try:
-        item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item
-      except: pass
-      current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers !
-      item_dic[current_item] = ""
-      stringlist=[]
-    else:
-      stringlist.append(line[:-1])
-  item_dic[current_item] = "".join(stringlist) # for the last item
-  return item_dic
-
-def antipara (sequence):
-    antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"}
-    revseq = sequence[::-1]
-    return "".join([antidict[i] for i in revseq])
-
-def RNAtranslate (sequence):
-    return "".join([i if i in "AGCN" else "U" for i in sequence])
-def DNAtranslate (sequence):
-    return "".join([i if i in "AGCN" else "T" for i in sequence])
-
-def RNAfold (sequence_list):
-  thestring= "\n".join(sequence_list)
-  p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
-  output=p.communicate(thestring)[0]
-  p.wait()
-  output=output.split("\n")
-  if not output[-1]: output = output[:-1] # nasty patch to remove last empty line
-  buffer=[]
-  for line in output:
-    if line[0] in ["N","A","T","U","G","C"]:
-      buffer.append(DNAtranslate(line))
-    if line[0] in ["(",".",")"]:
-      fields=line.split("(")
-      energy= fields[-1]
-      energy = energy[:-1] # remove the ) parenthesis
-      energy=float(energy)
-      buffer.append(str(energy))
-  return dict(zip(buffer[::2], buffer[1::2]))
-
-def extractsubinstance (start, end, instance):
-  ''' Testing whether this can be an function external to the class to save memory'''
-  subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start)
-  subinstance.gene = "%s %s %s" % (subinstance.gene, subinstance.windowoffset, subinstance.windowoffset + subinstance.size - 1)
-  upcoordinate = [i for i in range(start,end+1) if instance.readDict.has_key(i) ]
-  downcoordinate = [-i for i in range(start,end+1) if instance.readDict.has_key(-i) ]
-  for i in upcoordinate:
-    subinstance.readDict[i]=instance.readDict[i]
-  for i in downcoordinate:
-    subinstance.readDict[i]=instance.readDict[i]
-  return subinstance
-
-class HandleSmRNAwindows:
-  def __init__(self, alignmentFile="~", alignmentFileFormat="tabular", genomeRefFile="~", genomeRefFormat="bowtieIndex", biosample="undetermined", size_inf=None, size_sup=1000, norm=1.0):
-    self.biosample = biosample
-    self.alignmentFile = alignmentFile
-    self.alignmentFileFormat = alignmentFileFormat # can be "tabular" or "sam"
-    self.genomeRefFile = genomeRefFile
-    self.genomeRefFormat = genomeRefFormat # can be "bowtieIndex" or "fastaSource"
-    self.alignedReads = 0
-    self.instanceDict = {}
-    self.size_inf=size_inf
-    self.size_sup=size_sup
-    self.norm=norm
-    if genomeRefFormat == "bowtieIndex":
-      self.itemDict = get_fasta (genomeRefFile)
-    elif genomeRefFormat == "fastaSource":
-      self.itemDict = get_fasta_from_history (genomeRefFile)
-    for item in self.itemDict:
-      self.instanceDict[item] = SmRNAwindow(item, sequence=self.itemDict[item], windowoffset=1, biosample=self.biosample, norm=self.norm) # create as many instances as there is items
-    self.readfile()
-
-  def readfile (self) :
-    if self.alignmentFileFormat == "tabular":
-      F = open (self.alignmentFile, "r")
-      for line in F:
-        fields = line.split()
-        polarity = fields[1]
-        gene = fields[2]
-        offset = int(fields[3])
-        size = len (fields[4])
-        if self.size_inf:
-          if (size>=self.size_inf and size<= self.size_sup):
-            self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
-            self.alignedReads += 1
-        else:
-          self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
-          self.alignedReads += 1
-      F.close()
-      return self.instanceDict
-#    elif self.alignmentFileFormat == "sam":
-#      F = open (self.alignmentFile, "r")
-#      dict = {"0":"+", "16":"-"}
-#      for line in F:
-#        if line[0]=='@':
-#            continue
-#        fields = line.split()
-#        if fields[2] == "*": continue
-#        polarity = dict[fields[1]]
-#        gene = fields[2]
-#        offset = int(fields[3])
-#        size = len (fields[9])
-#        if self.size_inf:
-#          if (size>=self.size_inf and size<= self.size_sup):
-#            self.instanceDict[gene].addread (polarity, offset, size)
-#            self.alignedReads += 1
-#       else:
-#          self.instanceDict[gene].addread (polarity, offset, size)
-#          self.alignedReads += 1
-#      F.close()
-    elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam":
-      import pysam
-      samfile = pysam.Samfile(self.alignmentFile)
-      for read in samfile:
-        if read.tid == -1:
-          continue # filter out unaligned reads
-        if read.is_reverse:
-          polarity="-"
-        else:
-          polarity="+"
-        gene = samfile.getrname(read.tid)
-        offset = read.pos
-        size = read.qlen
-        if self.size_inf:
-          if (size>=self.size_inf and size<= self.size_sup):
-            self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
-            self.alignedReads += 1
-        else:
-          self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
-          self.alignedReads += 1
-      return self.instanceDict
-
-  def size_histogram (self):
-    size_dict={}
-    size_dict['F']= defaultdict (int)
-    size_dict['R']= defaultdict (int)
-    size_dict['both'] = defaultdict (int)
-    for item in self.instanceDict:
-      buffer_dict_F = self.instanceDict[item].size_histogram()['F']
-      buffer_dict_R = self.instanceDict[item].size_histogram()['R']
-      for size in buffer_dict_F:
-        size_dict['F'][size] += buffer_dict_F[size]
-      for size in buffer_dict_R:
-        size_dict['R'][size] -= buffer_dict_R[size]
-    allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) )
-    for size in allSizeKeys:
-      size_dict['both'][size] = size_dict['F'][size] + size_dict['R'][size]
-    return size_dict
-
-  def CountFeatures (self, GFF3="path/to/file"):
-    featureDict = defaultdict(int)
-    F  = open (GFF3, "r")
-    for line in F:
-      if line[0] ==  "#": continue
-      fields = line[:-1].split()
-      chrom, feature, leftcoord, rightcoord, polarity = fields[0], fields[2], fields[3], fields[4], fields[6]
-      featureDict[feature] += self.instanceDict[chrom].readcount(upstream_coord=int(leftcoord), downstream_coord=int(rightcoord), polarity="both", method="destructive")
-    F.close()
-    return featureDict
-
-class SmRNAwindow:
-
-  def __init__(self, gene, sequence="ATGC", windowoffset=1, biosample="Undetermined", norm=1.0):
-    self.biosample = biosample
-    self.sequence = sequence
-    self.gene = gene
-    self.windowoffset = windowoffset
-    self.size = len(sequence)
-    self.readDict = defaultdict(list) # with a {+/-offset:[size1, size2, ...], ...}
-    self.matchedreadsUp = 0
-    self.matchedreadsDown = 0
-    self.norm=norm
-    
-  def addread (self, polarity, offset, size):
-    '''ATTENTION ATTENTION ATTENTION'''
-    ''' We removed the conversion from 0 to 1 based offset, as we do this now during readparsing.'''
-    if polarity == "+":
-      self.readDict[offset].append(size)
-      self.matchedreadsUp += 1
-    else:
-      self.readDict[-(offset + size -1)].append(size)
-      self.matchedreadsDown += 1
-    return
-
-  def barycenter (self, upstream_coord=None, downstream_coord=None):
-    '''refactored 24-12-2013 to save memory and introduce offset filtering see readcount method for further discussion on that
-    In this version, attempt to replace the dictionary structure by a list of tupple to save memory too'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    window_size = downstream_coord - upstream_coord +1
-    def weigthAverage (TuppleList):
-      weightSum = 0
-      PonderWeightSum = 0
-      for tuple in TuppleList:
-        PonderWeightSum += tuple[0] * tuple[1]
-        weightSum += tuple[1]
-      if weightSum > 0:
-        return PonderWeightSum / float(weightSum)
-      else:
-        return 0
-    forwardTuppleList = [(k, len(self.readDict[k])) for k in self.readDict.keys() if (k > 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both forward and in the proper offset window
-    reverseTuppleList = [(-k, len(self.readDict[k])) for k in self.readDict.keys() if (k < 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both reverse and in the proper offset window
-    Fbarycenter = (weigthAverage (forwardTuppleList) - upstream_coord) / window_size
-    Rbarycenter = (weigthAverage (reverseTuppleList) - upstream_coord) / window_size
-    return Fbarycenter, Rbarycenter
-
-  def correlation_mapper (self, reference, window_size):
-    '''to map correlation with a sliding window 26-2-2013'''
-    if window_size > self.size:
-      return []
-    F=open(reference, "r")
-    reference_forward = []
-    reference_reverse = []
-    for line in F:
-      fields=line.split()
-      reference_forward.append(int(float(fields[1])))
-      reference_reverse.append(int(float(fields[2])))
-    F.close()
-    local_object_forward=[]
-    local_object_reverse=[]
-    ## Dict to list for the local object
-    for i in range(1, self.size+1):
-      local_object_forward.append(len(self.readDict[i]))
-      local_object_reverse.append(len(self.readDict[-i]))
-    ## start compiling results by slides
-    results=[]
-    for coordinate in range(self.size - window_size):
-      local_forward=local_object_forward[coordinate:coordinate + window_size]
-      local_reverse=local_object_reverse[coordinate:coordinate + window_size]
-      if sum(local_forward) == 0 or sum(local_reverse) == 0: 
-        continue
-      try:
-        reference_to_local_cor_forward = stats.spearmanr(local_forward, reference_forward)
-        reference_to_local_cor_reverse = stats.spearmanr(local_reverse, reference_reverse)
-        if (reference_to_local_cor_forward[0] > 0.2 or  reference_to_local_cor_reverse[0]>0.2):
-          results.append([coordinate+1, reference_to_local_cor_forward[0], reference_to_local_cor_reverse[0]])
-      except:
-        pass
-    return results
-
-  def readcount (self, size_inf=0, size_sup=1000, upstream_coord=None, downstream_coord=None, polarity="both", method="conservative"):
-    '''refactored 24-12-2013 to save memory and introduce offset filtering
-    take a look at the defaut parameters that cannot be defined relatively to the instance are they are defined before instanciation
-    the trick is to pass None and then test
-    polarity parameter can take "both", "forward" or "reverse" as value'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "both":
-      return self.matchedreadsUp +  self.matchedreadsDown
-    if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "forward":
-      return self.matchedreadsUp    
-    if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "reverse":
-      return self.matchedreadsDown    
-    n=0
-    if polarity == "both":
-      for offset in xrange(upstream_coord, downstream_coord+1):
-        if self.readDict.has_key(offset):
-          for read in self.readDict[offset]:
-            if (read>=size_inf and read<= size_sup):
-              n += 1
-          if method != "conservative":
-            del self.readDict[offset] ## Carefull ! precludes re-use on the self.readDict dictionary !!!!!! TEST
-        if self.readDict.has_key(-offset):
-          for read in self.readDict[-offset]:
-            if (read>=size_inf and read<= size_sup):
-              n += 1
-          if method != "conservative":
-            del self.readDict[-offset]
-      return n
-    elif polarity == "forward":
-      for offset in xrange(upstream_coord, downstream_coord+1):
-        if self.readDict.has_key(offset):
-          for read in self.readDict[offset]:
-            if (read>=size_inf and read<= size_sup):
-              n += 1
-      return n
-    elif polarity == "reverse":
-      for offset in xrange(upstream_coord, downstream_coord+1):
-        if self.readDict.has_key(-offset):
-          for read in self.readDict[-offset]:
-            if (read>=size_inf and read<= size_sup):
-              n += 1
-      return n
-
-  def readsizes (self):
-    '''return a dictionary of number of reads by size (the keys)'''
-    dicsize = {}
-    for offset in self.readDict:
-      for size in self.readDict[offset]:
-        dicsize[size] = dicsize.get(size, 0) + 1
-    for offset in range (min(dicsize.keys()), max(dicsize.keys())+1):
-      dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values
-    return dicsize
-    
-  def size_histogram(self):
-    norm=self.norm
-    hist_dict={}
-    hist_dict['F']={}
-    hist_dict['R']={}
-    for offset in self.readDict:
-      for size in self.readDict[offset]:
-        if offset < 0:
-          hist_dict['R'][size] = hist_dict['R'].get(size, 0) - 1*norm
-        else:
-         hist_dict['F'][size] = hist_dict['F'].get(size, 0) + 1*norm
-    return hist_dict
-
-  def statsizes (self, upstream_coord=None, downstream_coord=None):
-    ''' migration to memory saving by specifying possible subcoordinates
-    see the readcount method for further discussion'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    L = []
-    for offset in self.readDict:
-      if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
-      for size in self.readDict[offset]:
-        L.append(size)
-    meansize = mean(L)
-    stdv = std(L)
-    mediansize = median(L)         
-    return meansize, mediansize, stdv
-
-  def foldEnergy (self, upstream_coord=None, downstream_coord=None):
-    ''' migration to memory saving by specifying possible subcoordinates
-    see the readcount method for further discussion'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    Energy = RNAfold ([self.sequence[upstream_coord-1:downstream_coord] ])
-    return float(Energy[self.sequence[upstream_coord-1:downstream_coord]])
-
-  def Ufreq (self, size_scope, upstream_coord=None, downstream_coord=None):
-    ''' migration to memory saving by specifying possible subcoordinates
-    see the readcount method for further discussion. size_scope must be an interable'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    freqDic = {"A":0,"T":0,"G":0,"C":0, "N":0}
-    convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"}
-    for offset in self.readDict:
-      if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
-      for size in self.readDict[offset]:
-        if size in size_scope:
-          startbase = self.sequence[abs(offset)-self.windowoffset]
-          if offset < 0:
-            startbase = convertDic[startbase]
-          freqDic[startbase] += 1
-    base_sum = float ( sum( freqDic.values()) )
-    if base_sum == 0:
-      return "."
-    else:
-      return freqDic["T"] / base_sum * 100
-
-  def Ufreq_stranded (self, size_scope, upstream_coord=None, downstream_coord=None):
-    ''' migration to memory saving by specifying possible subcoordinates
-    see the readcount method for further discussion. size_scope must be an interable
-    This method is similar to the Ufreq method but take strandness into account'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    freqDic = {"Afor":0,"Tfor":0,"Gfor":0,"Cfor":0, "Nfor":0,"Arev":0,"Trev":0,"Grev":0,"Crev":0, "Nrev":0}
-    convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"}
-    for offset in self.readDict:
-      if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
-      for size in self.readDict[offset]:
-        if size in size_scope:
-          startbase = self.sequence[abs(offset)-self.windowoffset]
-          if offset < 0:
-            startbase = convertDic[startbase]
-            freqDic[startbase+"rev"] += 1
-          else:
-            freqDic[startbase+"for"] += 1
-    forward_sum = float ( freqDic["Afor"]+freqDic["Tfor"]+freqDic["Gfor"]+freqDic["Cfor"]+freqDic["Nfor"])
-    reverse_sum = float ( freqDic["Arev"]+freqDic["Trev"]+freqDic["Grev"]+freqDic["Crev"]+freqDic["Nrev"])
-    if forward_sum == 0 and reverse_sum == 0:
-      return ". | ."
-    elif reverse_sum == 0:
-      return "%s | ." % (freqDic["Tfor"] / forward_sum * 100)
-    elif forward_sum == 0:
-      return ". | %s" % (freqDic["Trev"] / reverse_sum * 100)
-    else:
-      return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100)
-
-    
-  def readplot (self):
-    norm=self.norm
-    readmap = {}
-    for offset in self.readDict.keys():
-      readmap[abs(offset)] = ( len(self.readDict.get(-abs(offset),[]))*norm , len(self.readDict.get(abs(offset),[]))*norm )
-    mylist = []
-    for offset in sorted(readmap):
-      if readmap[offset][1] != 0:
-        mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, readmap[offset][1], "F") )
-      if readmap[offset][0] != 0:
-        mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, -readmap[offset][0], "R") )
-    return mylist
-
-  def readcoverage (self, upstream_coord=None, downstream_coord=None, windowName=None):
-    '''Use by MirParser tool'''
-    upstream_coord = upstream_coord or 1
-    downstream_coord = downstream_coord or self.size
-    windowName = windowName or "%s_%s_%s" % (self.gene, upstream_coord, downstream_coord)
-    forORrev_coverage = dict ([(i,0) for i in xrange(1, downstream_coord-upstream_coord+1)])
-    totalforward = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="forward")
-    totalreverse = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="reverse")
-    if totalforward > totalreverse:
-      majorcoverage = "forward"
-      for offset in self.readDict.keys():
-        if (offset > 0) and ((offset-upstream_coord+1) in forORrev_coverage.keys() ):
-          for read in self.readDict[offset]:
-            for i in xrange(read):
-              try:
-                forORrev_coverage[offset-upstream_coord+1+i] += 1
-              except KeyError:
-                continue # a sense read may span over the downstream limit
-    else:
-      majorcoverage = "reverse"
-      for offset in self.readDict.keys():
-        if (offset < 0) and (-offset-upstream_coord+1 in forORrev_coverage.keys() ):
-          for read in self.readDict[offset]:
-            for i in xrange(read):
-              try:
-                forORrev_coverage[-offset-upstream_coord-i] += 1 ## positive coordinates in the instance, with + for forward coverage and - for reverse coverage
-              except KeyError:
-                continue # an antisense read may span over the upstream limit
-    output_list = []
-    maximum = max (forORrev_coverage.values()) or 1
-    for n in sorted (forORrev_coverage):
-      output_list.append("%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.biosample, windowName, n, float(n)/(downstream_coord-upstream_coord+1), forORrev_coverage[n], float(forORrev_coverage[n])/maximum, majorcoverage))
-    return "\n".join(output_list)
-
-          
-  def signature (self, minquery, maxquery, mintarget, maxtarget, scope, zscore="no", upstream_coord=None, downstream_coord=None):
-    ''' migration to memory saving by specifying possible subcoordinates
-    see the readcount method for further discussion
-    scope must be a python iterable; scope define the *relative* offset range to be computed'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    query_range = range (minquery, maxquery+1)
-    target_range = range (mintarget, maxtarget+1)
-    Query_table = {}
-    Target_table = {}
-    frequency_table = dict ([(i, 0) for i in scope])
-    for offset in self.readDict:
-      if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
-      for size in self.readDict[offset]:
-        if size in query_range:
-          Query_table[offset] = Query_table.get(offset, 0) + 1
-        if size in target_range:
-          Target_table[offset] = Target_table.get(offset, 0) + 1
-    for offset in Query_table:
-      for i in scope:
-        frequency_table[i] += min(Query_table[offset], Target_table.get(-offset -i +1, 0))
-    if minquery==mintarget and maxquery==maxtarget: ## added to incorporate the division by 2 in the method (26/11/2013), see signature_options.py and lattice_signature.py
-      frequency_table = dict([(i,frequency_table[i]/2) for i in frequency_table])
-    if zscore == "yes":
-      z_mean = mean(frequency_table.values() )
-      z_std = std(frequency_table.values() )
-      if z_std == 0:
-        frequency_table = dict([(i,0) for i in frequency_table] )
-      else:
-        frequency_table = dict([(i, (frequency_table[i]- z_mean)/z_std) for i in frequency_table] )    
-    return frequency_table
-
-  def hannon_signature (self, minquery, maxquery, mintarget, maxtarget, scope, upstream_coord=None, downstream_coord=None):
-    ''' migration to memory saving by specifying possible subcoordinates see the readcount method for further discussion
-    note that scope must be an iterable (a list or a tuple), which specifies the relative offsets that will be computed'''
-    upstream_coord = upstream_coord or self.windowoffset
-    downstream_coord = downstream_coord or self.windowoffset+self.size-1
-    query_range = range (minquery, maxquery+1)
-    target_range = range (mintarget, maxtarget+1)
-    Query_table = {}
-    Target_table = {}
-    Total_Query_Numb = 0
-    general_frequency_table = dict ([(i,0) for i in scope])
-    ## filtering the appropriate reads for the study
-    for offset in self.readDict:
-      if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
-      for size in self.readDict[offset]:
-        if size in query_range:
-          Query_table[offset] = Query_table.get(offset, 0) + 1
-          Total_Query_Numb += 1
-        if size in target_range:
-          Target_table[offset] = Target_table.get(offset, 0) + 1
-    for offset in Query_table:
-      frequency_table = dict ([(i,0) for i in scope])
-      number_of_targets = 0
-      for i in scope:
-        frequency_table[i] += Query_table[offset] *  Target_table.get(-offset -i +1, 0)
-        number_of_targets += Target_table.get(-offset -i +1, 0)
-      for i in scope:
-        try:
-          general_frequency_table[i] += (1. / number_of_targets / Total_Query_Numb) * frequency_table[i]
-        except ZeroDivisionError :
-          continue
-    return general_frequency_table      
-
-  def phasing (self, size_range, scope):
-    ''' to calculate autocorelation like signal - scope must be an python iterable'''
-    read_table = {}
-    total_read_number = 0
-    general_frequency_table = dict ([(i, 0) for i in scope])
-    ## read input filtering
-    for offset in self.readDict:
-      for size in self.readDict[offset]:
-        if size in size_range:
-          read_table[offset] = read_table.get(offset, 0) + 1
-          total_read_number += 1
-    ## per offset read phasing computing
-    for offset in read_table:
-      frequency_table = dict ([(i, 0) for i in scope]) # local frequency table
-      number_of_targets = 0
-      for i in scope:
-        if offset > 0:
-          frequency_table[i] += read_table[offset] *  read_table.get(offset + i, 0)
-          number_of_targets += read_table.get(offset + i, 0)
-        else:
-          frequency_table[i] += read_table[offset] *  read_table.get(offset - i, 0)
-          number_of_targets += read_table.get(offset - i, 0)
-    ## inclusion of local frequency table in the general frequency table (all offsets average)
-      for i in scope:
-        try:
-          general_frequency_table[i] += (1. / number_of_targets / total_read_number) * frequency_table[i]
-        except ZeroDivisionError :
-          continue
-    return general_frequency_table
-
-
-
-  def z_signature (self, minquery, maxquery, mintarget, maxtarget, scope):
-    '''Must do: from numpy import mean, std, to use this method; scope must be a python iterable and defines the relative offsets to compute'''
-    frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope)
-    z_table = {}
-    frequency_list = [frequency_table[i] for i in sorted (frequency_table)]
-    if std(frequency_list):
-      meanlist = mean(frequency_list)
-      stdlist = std(frequency_list)
-      z_list = [(i-meanlist)/stdlist for i in frequency_list]
-      return dict (zip (sorted(frequency_table), z_list) ) 
-    else:
-      return dict (zip (sorted(frequency_table), [0 for i in frequency_table]) )
-
-  def percent_signature (self, minquery, maxquery, mintarget, maxtarget, scope):
-    frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope)
-    total = float(sum ([self.readsizes().get(i,0) for i in set(range(minquery,maxquery)+range(mintarget,maxtarget))]) )
-    if total == 0:
-      return dict( [(i,0) for i in scope])
-    return dict( [(i, frequency_table[i]/total*100) for i in scope])
-
-  def pairer (self, overlap, minquery, maxquery, mintarget, maxtarget):
-    queryhash = defaultdict(list)
-    targethash = defaultdict(list)
-    query_range = range (int(minquery), int(maxquery)+1)
-    target_range = range (int(mintarget), int(maxtarget)+1)
-    paired_sequences = []
-    for offset in self.readDict: # selection of data
-      for size in self.readDict[offset]:
-        if size in query_range:
-          queryhash[offset].append(size)
-        if size in target_range:
-          targethash[offset].append(size)
-    for offset in queryhash:
-      if offset >= 0: matched_offset = -offset - overlap + 1
-      else: matched_offset = -offset - overlap + 1
-      if targethash[matched_offset]:
-        paired = min ( len(queryhash[offset]), len(targethash[matched_offset]) )
-        if offset >= 0:
-          for i in range (paired):
-            paired_sequences.append("+%s" % RNAtranslate ( self.sequence[offset:offset+queryhash[offset][i]]) )
-            paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) )
-        if offset < 0:
-          for i in range (paired):
-            paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) )
-            paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) )
-    return paired_sequences
-
-  def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget):
-    queryhash = defaultdict(list)
-    targethash = defaultdict(list)
-    query_range = range (int(minquery), int(maxquery)+1)
-    target_range = range (int(mintarget), int(maxtarget)+1)
-    paired_sequences = []
-
-    for offset in self.readDict: # selection of data
-      for size in self.readDict[offset]:
-        if size in query_range:
-          queryhash[offset].append(size)
-        if size in target_range:
-          targethash[offset].append(size)
-
-    for offset in queryhash:
-      matched_offset = -offset - overlap + 1
-      if targethash[matched_offset]:
-        if offset >= 0:
-          for i in queryhash[offset]:
-            paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) )
-          for i in targethash[matched_offset]:
-            paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) )
-        if offset < 0:
-          for i in queryhash[offset]:
-            paired_sequences.append("-%s" %  RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) )
-          for i in targethash[matched_offset]:
-            paired_sequences.append("+%s" %  RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) )
-    return paired_sequences
-
-  def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget):
-    ''' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates'''
-    queryhash = defaultdict(list)
-    targethash = defaultdict(list)
-    query_range = range (int(minquery), int(maxquery)+1)
-    target_range = range (int(mintarget), int(maxtarget)+1)
-    bowtie_output = []
-
-    for offset in self.readDict: # selection of data
-      for size in self.readDict[offset]:
-        if size in query_range:
-          queryhash[offset].append(size)
-        if size in target_range:
-          targethash[offset].append(size)
-    counter = 0
-    for offset in queryhash:
-      matched_offset = -offset - overlap + 1
-      if targethash[matched_offset]:
-        if offset >= 0:
-          for i in queryhash[offset]:
-            counter += 1
-            bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l'offset 
-        if offset < 0:
-          for i in queryhash[offset]:
-            counter += 1
-            bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l'offset
-    return bowtie_output
-
-
-def __main__(bowtie_index_path, bowtie_output_path):
-  sequenceDic = get_fasta (bowtie_index_path)
-  objDic = {}
-  F = open (bowtie_output_path, "r") # F is the bowtie output taken as input
-  for line in F:
-    fields = line.split()
-    polarity = fields[1]
-    gene = fields[2]
-    offset = int(fields[3])
-    size = len (fields[4])
-    try:
-      objDic[gene].addread (polarity, offset, size)
-    except KeyError:
-      objDic[gene] = SmRNAwindow(gene, sequenceDic[gene])
-      objDic[gene].addread (polarity, offset, size)
-  F.close()
-  for gene in objDic:
-    print gene, objDic[gene].pairer(19,19,23,19,23)
-
-if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2]) 
--- a/mississippi_gcc/test-data/sRbowtie.fa	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
->1
-GTATTGTAAGTGGCAGAGTGGC
->2
-TGGAATGTAAAGAAGTATGGAG
->3
-GTGGGGAGTTTGGATGGGGCGGCA
->4
-AATGGCACTGGAAGAATTCACGG
->5
-GTACGGACAAGGGGAATC
->6
-TTGGGTTCTGGGGGGAGTATGG
->7
-GTGGGGAGTTTCGCTGGGGCGGCA
->8
-TAAGGGTCGGGTAGTGAGGGC
->9
-AGCTGGGACTGAGGACTG
->10
-AGCTGGGACTGAGGACTGC
->11
-AAGGGTCGGGTAGTGAGG
->12
-GTCGGGTAGTGAGGGCCTT
->13
-TGGTGGGGCTTGGAACAATTGGAGGGC
->14
-TGACGGAAGGGCACCACC
->15
-TGGAATGTAAAGAAGTATGGAG
->16
-TTGGGTTCTGGGGGGAGT
->17
-TCAGGTGGGGAGTTTGGCTGGGGCGGCACA
->18
-TTGGGTATAGGGGCGAAAGA
->19
-AGCGGGCGTGCTTGTGGAC
->20
-GTCGAATTTGGGTATAGGGGC
--- a/mississippi_gcc/test-data/sRbowtie.out	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-2	+	2L	20487495	TGGAATGTAAAGAAGTATGGAG
-4	-	2L	11953463	CCGTGAATTCTTCCAGTGCCATT
-15	+	2L	20487495	TGGAATGTAAAGAAGTATGGAG
-14	-	Uextra	7115665	GGTGGTGCCCTTCCGTCA
-18	+	Uextra	7726410	TTGGGTATAGGGGCGAAAGA
--- a/mississippi_gcc/test-data/yac.fastq	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-@SRR290479.1 HWI-EAS285:2:1:66:28/1
-TGTAAACATCCCCGACTGGCAGCATNTCGTATGCCG
-+
-B@BBCBCCBCBCCC8A<@##################
-@SRR290479.2 HWI-EAS285:2:1:67:348/1
-AAAGTGCTACTACTTTTGAGTCTATNTCGTACGCCG
-+
-BAA@7?A@@A@@B<'25?6>59:;7#<?########
-@SRR290479.3 HWI-EAS285:2:1:68:826/1
-TAGCTTATCAGACTGATGTTGACACNTCGTATGCCG
-+
-BB@BBCCBCCBBB:%%83/>B7@44#;;324'117?
-@SRR290479.4 HWI-EAS285:2:1:68:65/1
-ACTGGACTTGGAGTCCGAAGGCATCNCGTATTCCGT
-+
-BBB@@ABAAB?9B42&9;##################
-@SRR290479.5 HWI-EAS285:2:1:69:594/1
-AAGTGCCGCCAGGTTTTGAGTGGATNTCGTATGGCG
-+
-AB?5;3>/=?>=;416481#################
-@SRR290479.6 HWI-EAS285:2:1:70:700/1
-TATTGCACTTGTCCCGGCCTGAATCNCGTATCCCGT
-+
-BCB=:ACCBB=>BB8<-###################
-@SRR290479.7 HWI-EAS285:2:1:70:1679/1
-TGGTAGACTATGGAACGTAGGATCTNGCATGCCGCC
-+
-BCBBCCBCCCBCCA?AB>:B@><>############
-@SRR290479.8 HWI-EAS285:2:1:71:1400/1
-AGTGGTAGAGCATTTGAATCTCGTANGCCGTCTTCT
-+
-7@BC>>@55CCBCA3CBA14B.A16#*;9359B###
-@SRR290479.9 HWI-EAS285:2:1:71:795/1
-TAGCTTATCAGACTGATGTTGACATNTCGTACGCCG
-+
-BBBBBCBBCB;>AA',9=18?1:7:#<;57######
-@SRR290479.10 HWI-EAS285:2:1:71:596/1
-TTTGGCAATGGTAGAACTCCCACACNTCGTAGGCCG
-+
-B@B>7>9A@<46B@79972#################
--- a/mississippi_gcc/test-data/yac.out	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
->1
-TGTAAACATCCCCGACTGGCAGC
->2
-AAAGTGCTACTACTTTTGAGTCT
->3
-ACTGGACTTGGAGTCCGAAGGC
->4
-AAGTGCCGCCAGGTTTTGAGTGG
->5
-TATTGCACTTGTCCCGGCCTGAATCNCGT
->6
-TAGCTTATCAGACTGATGTTGAC
--- a/mississippi_gcc/tool_data_table_conf.xml.sample	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,112 +0,0 @@
-<tables>
-    <!-- Locations of all fasta files under genome directory -->
-    <table name="all_fasta" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/all_fasta.loc" />
-    </table>
-    <!-- Locations of indexes in the BFAST mapper format -->
-    <table name="bfast_indexes" comment_char="#">
-        <columns>value, dbkey, formats, name, path</columns>
-        <file path="tool-data/bfast_indexes.loc" />
-    </table>
-    <!-- Locations of nucleotide (mega)blast databases -->
-    <table name="blastdb" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/blastdb.loc" />
-    </table>
-    <!-- Locations of protein (mega)blast databases -->
-    <table name="blastdb_p" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/blastdb_p.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie mapper format -->
-    <table name="bowtie_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie_indices.loc" />
-    </table>
-    <!-- Locations of bowtie2 indexes -->
-    <table name="bowtie2_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie2_indices.loc" />
-    </table>
-    <!-- Locations of bowtie2 indeces for tophat2 -->
-    <table name="tophat2_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie2_indices.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie color-space mapper format -->
-    <table name="bowtie_indexes_color" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie_indices_color.loc" />
-    </table>
-    <!-- Locations of indexes in the BWA mapper format -->
-    <table name="bwa_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bwa_index.loc" />
-    </table>
-    <!-- Locations of indexes in the BWA color-space mapper format -->
-    <table name="bwa_indexes_color" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bwa_index_color.loc" />
-    </table>
-    <!-- Locations of MAF files that have been indexed with bx-python -->
-    <table name="indexed_maf_files">
-        <columns>name, value, dbkey, species</columns>
-        <file path="tool-data/maf_index.loc" />
-    </table>
-    <!-- Locations of fasta files appropriate for NGS simulation -->
-    <table name="ngs_sim_fasta" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/ngs_sim_fasta.loc" />
-    </table>
-    <!-- Locations of 2bit sequence files for use in Lastz -->
-    <table name="lastz_seqs" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/lastz_seqs.loc" />
-    </table>
-    <!-- Locations of PerM base index files -->
-    <table name="perm_base_indexes" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/perm_base_index.loc" />
-    </table>
-    <!-- Locations of PerM color-space index files -->
-    <table name="perm_color_indexes" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/perm_color_index.loc" />
-    </table>
-    <!-- Location of Picard dict file and other files -->
-    <table name="picard_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/picard_index.loc" />
-    </table>
-    <!-- Location of Picard dict files valid for GATK -->
-    <table name="gatk_picard_indexes" comment_char="#">
-        <columns>value, dbkey, name, path, tools_valid_for</columns>
-        <file path="tool-data/picard_index.loc" />
-    </table>
-    <!-- Location of SRMA dict file and other files -->
-    <table name="srma_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/picard_index.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie mapper format for TopHat to use -->
-    <table name="tophat_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie_indices.loc" />
-    </table>
-    <!-- Locations of indexes in the Bowtie color-space mapper format for TopHat to use -->
-    <table name="tophat_indexes_color" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/bowtie_indices_color.loc" />
-    </table>
-    <!-- Locations of configurations in the CCAT peak/region caller format -->
-    <table name="ccat_configurations" comment_char="#">
-        <columns>value, name, path</columns>
-        <file path="tool-data/ccat_configurations.loc" />
-    </table>
-    <!-- Location of Mosaik files -->
-    <table name="mosaik_indexes" comment_char="#">
-        <columns>value, dbkey, name, path</columns>
-        <file path="tool-data/mosaik_index.loc" />
-    </table>
-</tables>
--- a/mississippi_gcc/yac.py	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,83 +0,0 @@
-#!/usr/bin/python
-# yac = yet another clipper
-# v 1.0.0
-# Usage yac.py  $input $output $adapter_to_clip $min $max $Nmode
-# Christophe Antoniewski <drosofff@gmail.com>
-
-import sys, string
-
-class Clip:
-  def __init__(self, inputfile, outputfile, adapter, minsize, maxsize):
-    self.inputfile = inputfile
-    self.outputfile = outputfile
-    self.adapter = adapter
-    self.minsize = int(minsize)
-    self.maxsize = int(maxsize)
-    def motives (sequence):
-      '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module'''
-      sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match
-      dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"}
-      for pos in enumerate(sequence[:6]):
-        for subst in dicsubst[pos[1]]:
-          sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7])
-      return sequencevariants
-    self.adaptmotifs= motives(self.adapter)
-
-  def scanadapt(self, adaptmotives=[], sequence=""):
-    '''scans sequence for adapter motives'''
-    if sequence.rfind(adaptmotives[0]) != -1:
-      return sequence[:sequence.rfind(adaptmotives[0])]
-    for motif in adaptmotives[1:]:
-      if sequence.rfind(motif) != -1:
-        return sequence[:sequence.rfind(motif)]
-    return sequence
-
-  def clip_with_N (self):
-    '''clips adapter sequences from inputfile. 
-    Reads containing N are retained.'''
-    iterator = 0
-    id = 0
-    F = open (self.inputfile, "r")
-    O = open (self.outputfile, "w")
-    for line in F:
-      iterator += 1
-      if iterator % 4 == 2:
-        trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
-        if self.minsize <= len(trim) <= self.maxsize:
-          id += 1
-          print >> O, ">%i\n%s" % (id, trim)
-    F.close()
-    O.close()
-  def clip_without_N (self):
-    '''clips adapter sequences from inputfile. 
-    Reads containing N are rejected.'''
-    iterator = 0
-    id = 0
-    F = open (self.inputfile, "r")
-    O = open (self.outputfile, "w")
-    for line in F:
-      iterator += 1
-      if iterator % 4 == 2:
-        trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
-        if "N" in trim: continue
-        if self.minsize <= len(trim) <= self.maxsize:
-          id += 1
-          print >> O, ">%i\n%s" % (id, trim)
-    F.close()
-    O.close()
-
-def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode):
-  instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize)
-  if Nmode == "accept":
-    instanceClip.clip_with_N()
-  else:
-    instanceClip.clip_without_N()
-
-if __name__ == "__main__" :
-  input = sys.argv[1]
-  output = sys.argv[2]
-  adapter = sys.argv[3]
-  minsize = sys.argv[4]
-  maxsize = sys.argv[5]
-  Nmode = sys.argv[6]
-  __main__(input, output, adapter, minsize, maxsize, Nmode)
--- a/mississippi_gcc/yac.xml	Tue Jun 24 10:58:46 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
- <tool id="yac" name="Clip adapter" version="1.0.0">
-  <description></description>
-  <command interpreter="python">yac.py $input $output $clip_source.clip_sequence $min $max $Nmode</command>
-  <inputs>
-    <param format="fastq" name="input" type="data" label="Source file"/>
-    <param name="min" type="integer" size="4" value="15" label="min size"/>
-    <param name="max" type="integer" size="4" value="36" label="max size"/>
-    <param name="Nmode" type="select" label="Reads containing Nst">
-        <option value="accept" selected="True">accept</option>
-        <option value="reject">reject</option>
-    </param>
-    <conditional name="clip_source">
-   	 <param name="clip_source_list" type="select" label="Source" help="Built-in adapters or User-provided">
-         	<option value="prebuilt" selected="True">Use a built-in adapter (select from the list below)</option>
-         	<option value="user">Use custom sequence</option>
-         </param>
-         <when value="prebuilt">
-         	<param name="clip_sequence" type="select" label="Select Adapter to clip" help="if your adapter is not listed, input your own sequence">
-                	<option value="TCGTATGCCGTCTTCTGCTTG">Solexa TCGTATGCCGTCTTCTGCTTG</option>
-                        <option value="ATCTCGTATGCCGTCTTCTGCTT">Illumina ATCTCGTATGCCGTCTTCTGCTT</option>
-                        <option value="TGGAATTCTCGGGTGCCAAG" selected="True">Illumina TruSeq  TGGAATTCTCGGGTGCCAAG</option>
-                        <option value="CTGTAGGCACCATCAATCGT">IdT CTGTAGGCACCATCAATCGT</option>
-		</param>
-	</when>
-        <when value="user">
-		 <param name="clip_sequence" type="text" size="35"  label="Enter your Sequence" value="GAATCC"/>
-	</when>
-    </conditional>
-  </inputs>
-  <outputs>
-    <data format="fasta" name="output"  metadata="input" />
-  </outputs>
-
-  <help>
-<!-- write a decent doc ! -->
-This tool clips adapter sequences from a fastq file and fasta file of clipped reads with renumbered fasta headers.
-
-Clipped sequences with Ns can be discarded.
-
-Min size and max size filter clipped reads on their size.
-
-Note that unclipped reads that satisfy the min and max size conditions are kept.
-  </help>
-
-<!-- write a <test> section -->
-	<tests>
-                <test>
-                        <param name="input" value="yac.fastq" ftype="fastqsanger"/>
-                        <param name="min" value="18" />
-                        <param name="max" value="29" />
-                        <param name="clip_source_list" value="prebuilt" />
-                        <param name="clip_sequence" value="ATCTCGTATGCCGTCTTCTGCTT" />
-                        <param name="Nmode" value="accept" />
-                        <output name="output" file="yac.out" />
-                </test>
-        </tests>
-
-</tool>