changeset 5:ad813be00215 draft default tip

Uploaded
author drosofff
date Sat, 31 May 2014 15:12:15 -0400
parents 2f536ef15f49
children
files YAC/test-data/yac.fastq YAC/test-data/yac.out YAC/yac.py YAC/yac.xml test-data/yac.fastq test-data/yac.out
diffstat 6 files changed, 193 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/YAC/test-data/yac.fastq	Sat May 31 15:12:15 2014 -0400
@@ -0,0 +1,40 @@
+@SRR290479.1 HWI-EAS285:2:1:66:28/1
+TGTAAACATCCCCGACTGGCAGCATNTCGTATGCCG
++
+B@BBCBCCBCBCCC8A<@##################
+@SRR290479.2 HWI-EAS285:2:1:67:348/1
+AAAGTGCTACTACTTTTGAGTCTATNTCGTACGCCG
++
+BAA@7?A@@A@@B<'25?6>59:;7#<?########
+@SRR290479.3 HWI-EAS285:2:1:68:826/1
+TAGCTTATCAGACTGATGTTGACACNTCGTATGCCG
++
+BB@BBCCBCCBBB:%%83/>B7@44#;;324'117?
+@SRR290479.4 HWI-EAS285:2:1:68:65/1
+ACTGGACTTGGAGTCCGAAGGCATCNCGTATTCCGT
++
+BBB@@ABAAB?9B42&9;##################
+@SRR290479.5 HWI-EAS285:2:1:69:594/1
+AAGTGCCGCCAGGTTTTGAGTGGATNTCGTATGGCG
++
+AB?5;3>/=?>=;416481#################
+@SRR290479.6 HWI-EAS285:2:1:70:700/1
+TATTGCACTTGTCCCGGCCTGAATCNCGTATCCCGT
++
+BCB=:ACCBB=>BB8<-###################
+@SRR290479.7 HWI-EAS285:2:1:70:1679/1
+TGGTAGACTATGGAACGTAGGATCTNGCATGCCGCC
++
+BCBBCCBCCCBCCA?AB>:B@><>############
+@SRR290479.8 HWI-EAS285:2:1:71:1400/1
+AGTGGTAGAGCATTTGAATCTCGTANGCCGTCTTCT
++
+7@BC>>@55CCBCA3CBA14B.A16#*;9359B###
+@SRR290479.9 HWI-EAS285:2:1:71:795/1
+TAGCTTATCAGACTGATGTTGACATNTCGTACGCCG
++
+BBBBBCBBCB;>AA',9=18?1:7:#<;57######
+@SRR290479.10 HWI-EAS285:2:1:71:596/1
+TTTGGCAATGGTAGAACTCCCACACNTCGTAGGCCG
++
+B@B>7>9A@<46B@79972#################
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/YAC/test-data/yac.out	Sat May 31 15:12:15 2014 -0400
@@ -0,0 +1,12 @@
+>1
+TGTAAACATCCCCGACTGGCAGC
+>2
+AAAGTGCTACTACTTTTGAGTCT
+>3
+ACTGGACTTGGAGTCCGAAGGC
+>4
+AAGTGCCGCCAGGTTTTGAGTGG
+>5
+TATTGCACTTGTCCCGGCCTGAATCNCGT
+>6
+TAGCTTATCAGACTGATGTTGAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/YAC/yac.py	Sat May 31 15:12:15 2014 -0400
@@ -0,0 +1,83 @@
+#!/usr/bin/python
+# yac = yet another clipper
+# v 1.0.0
+# Usage yac.py  $input $output $adapter_to_clip $min $max $Nmode
+# Christophe Antoniewski <drosofff@gmail.com>
+
+import sys, string
+
+class Clip:
+  def __init__(self, inputfile, outputfile, adapter, minsize, maxsize):
+    self.inputfile = inputfile
+    self.outputfile = outputfile
+    self.adapter = adapter
+    self.minsize = int(minsize)
+    self.maxsize = int(maxsize)
+    def motives (sequence):
+      '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module'''
+      sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match
+      dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"}
+      for pos in enumerate(sequence[:6]):
+        for subst in dicsubst[pos[1]]:
+          sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7])
+      return sequencevariants
+    self.adaptmotifs= motives(self.adapter)
+
+  def scanadapt(self, adaptmotives=[], sequence=""):
+    '''scans sequence for adapter motives'''
+    if sequence.rfind(adaptmotives[0]) != -1:
+      return sequence[:sequence.rfind(adaptmotives[0])]
+    for motif in adaptmotives[1:]:
+      if sequence.rfind(motif) != -1:
+        return sequence[:sequence.rfind(motif)]
+    return sequence
+
+  def clip_with_N (self):
+    '''clips adapter sequences from inputfile. 
+    Reads containing N are retained.'''
+    iterator = 0
+    id = 0
+    F = open (self.inputfile, "r")
+    O = open (self.outputfile, "w")
+    for line in F:
+      iterator += 1
+      if iterator % 4 == 2:
+        trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
+        if self.minsize <= len(trim) <= self.maxsize:
+          id += 1
+          print >> O, ">%i\n%s" % (id, trim)
+    F.close()
+    O.close()
+  def clip_without_N (self):
+    '''clips adapter sequences from inputfile. 
+    Reads containing N are rejected.'''
+    iterator = 0
+    id = 0
+    F = open (self.inputfile, "r")
+    O = open (self.outputfile, "w")
+    for line in F:
+      iterator += 1
+      if iterator % 4 == 2:
+        trim = self.scanadapt (self.adaptmotifs, line.rstrip() )
+        if "N" in trim: continue
+        if self.minsize <= len(trim) <= self.maxsize:
+          id += 1
+          print >> O, ">%i\n%s" % (id, trim)
+    F.close()
+    O.close()
+
+def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode):
+  instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize)
+  if Nmode == "accept":
+    instanceClip.clip_with_N()
+  else:
+    instanceClip.clip_without_N()
+
+if __name__ == "__main__" :
+  input = sys.argv[1]
+  output = sys.argv[2]
+  adapter = sys.argv[3]
+  minsize = sys.argv[4]
+  maxsize = sys.argv[5]
+  Nmode = sys.argv[6]
+  __main__(input, output, adapter, minsize, maxsize, Nmode)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/YAC/yac.xml	Sat May 31 15:12:15 2014 -0400
@@ -0,0 +1,58 @@
+ <tool id="yac" name="Clip adapter" version="1.0.0">
+  <description></description>
+  <command interpreter="python">yac.py $input $output $clip_source.clip_sequence $min $max $Nmode</command>
+  <inputs>
+    <param format="fastq" name="input" type="data" label="Source file"/>
+    <param name="min" type="integer" size="4" value="15" label="min size"/>
+    <param name="max" type="integer" size="4" value="36" label="max size"/>
+    <param name="Nmode" type="select" label="Reads containing Nst">
+        <option value="accept" selected="True">accept</option>
+        <option value="reject">reject</option>
+    </param>
+    <conditional name="clip_source">
+   	 <param name="clip_source_list" type="select" label="Source" help="Built-in adapters or User-provided">
+         	<option value="prebuilt" selected="True">Use a built-in adapter (select from the list below)</option>
+         	<option value="user">Use custom sequence</option>
+         </param>
+         <when value="prebuilt">
+         	<param name="clip_sequence" type="select" label="Select Adapter to clip" help="if your adapter is not listed, input your own sequence">
+                	<option value="TCGTATGCCGTCTTCTGCTTG">Solexa TCGTATGCCGTCTTCTGCTTG</option>
+                        <option value="ATCTCGTATGCCGTCTTCTGCTT">Illumina ATCTCGTATGCCGTCTTCTGCTT</option>
+                        <option value="TGGAATTCTCGGGTGCCAAG" selected="True">Illumina TruSeq  TGGAATTCTCGGGTGCCAAG</option>
+                        <option value="CTGTAGGCACCATCAATCGT">IdT CTGTAGGCACCATCAATCGT</option>
+		</param>
+	</when>
+        <when value="user">
+		 <param name="clip_sequence" type="text" size="35"  label="Enter your Sequence" value="GAATCC"/>
+	</when>
+    </conditional>
+  </inputs>
+  <outputs>
+    <data format="fasta" name="output"  metadata="input" />
+  </outputs>
+
+  <help>
+<!-- write a decent doc ! -->
+This tool clips adapter sequences from a fastq file and fasta file of clipped reads with renumbered fasta headers.
+
+Clipped sequences with Ns can be discarded.
+
+Min size and max size filter clipped reads on their size.
+
+Note that unclipped reads that satisfy the min and max size conditions are kept.
+  </help>
+
+<!-- write a <test> section -->
+	<tests>
+                <test>
+                        <param name="input" value="yac.fastq" ftype="fastqsanger"/>
+                        <param name="min" value="18" />
+                        <param name="max" value="29" />
+                        <param name="clip_source_list" value="prebuilt" />
+                        <param name="clip_sequence" value="ATCTCGTATGCCGTCTTCTGCTT" />
+                        <param name="Nmode" value="accept" />
+                        <output name="output" file="yac.out" />
+                </test>
+        </tests>
+
+</tool>
--- a/test-data/yac.fastq	Sat May 17 18:41:24 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-@SRR290479.1 HWI-EAS285:2:1:66:28/1
-TGTAAACATCCCCGACTGGCAGCATNTCGTATGCCG
-+
-B@BBCBCCBCBCCC8A<@##################
-@SRR290479.2 HWI-EAS285:2:1:67:348/1
-AAAGTGCTACTACTTTTGAGTCTATNTCGTACGCCG
-+
-BAA@7?A@@A@@B<'25?6>59:;7#<?########
-@SRR290479.3 HWI-EAS285:2:1:68:826/1
-TAGCTTATCAGACTGATGTTGACACNTCGTATGCCG
-+
-BB@BBCCBCCBBB:%%83/>B7@44#;;324'117?
-@SRR290479.4 HWI-EAS285:2:1:68:65/1
-ACTGGACTTGGAGTCCGAAGGCATCNCGTATTCCGT
-+
-BBB@@ABAAB?9B42&9;##################
-@SRR290479.5 HWI-EAS285:2:1:69:594/1
-AAGTGCCGCCAGGTTTTGAGTGGATNTCGTATGGCG
-+
-AB?5;3>/=?>=;416481#################
-@SRR290479.6 HWI-EAS285:2:1:70:700/1
-TATTGCACTTGTCCCGGCCTGAATCNCGTATCCCGT
-+
-BCB=:ACCBB=>BB8<-###################
-@SRR290479.7 HWI-EAS285:2:1:70:1679/1
-TGGTAGACTATGGAACGTAGGATCTNGCATGCCGCC
-+
-BCBBCCBCCCBCCA?AB>:B@><>############
-@SRR290479.8 HWI-EAS285:2:1:71:1400/1
-AGTGGTAGAGCATTTGAATCTCGTANGCCGTCTTCT
-+
-7@BC>>@55CCBCA3CBA14B.A16#*;9359B###
-@SRR290479.9 HWI-EAS285:2:1:71:795/1
-TAGCTTATCAGACTGATGTTGACATNTCGTACGCCG
-+
-BBBBBCBBCB;>AA',9=18?1:7:#<;57######
-@SRR290479.10 HWI-EAS285:2:1:71:596/1
-TTTGGCAATGGTAGAACTCCCACACNTCGTAGGCCG
-+
-B@B>7>9A@<46B@79972#################
--- a/test-data/yac.out	Sat May 17 18:41:24 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
->1
-TGTAAACATCCCCGACTGGCAGC
->2
-AAAGTGCTACTACTTTTGAGTCT
->3
-ACTGGACTTGGAGTCCGAAGGC
->4
-AAGTGCCGCCAGGTTTTGAGTGG
->5
-TATTGCACTTGTCCCGGCCTGAATCNCGT
->6
-TAGCTTATCAGACTGATGTTGAC