Mercurial > repos > drosofff > yet_another_clipper
changeset 4:2f536ef15f49 draft
Deleted selected files
author | drosofff |
---|---|
date | Sat, 17 May 2014 18:41:24 -0400 |
parents | 222f48abaac4 |
children | ad813be00215 |
files | test-data/yac.fastq test-data/yac.out yac.py yac.xml |
diffstat | 4 files changed, 52 insertions(+), 136 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/yac.fastq Sat May 17 18:41:24 2014 -0400 @@ -0,0 +1,40 @@ +@SRR290479.1 HWI-EAS285:2:1:66:28/1 +TGTAAACATCCCCGACTGGCAGCATNTCGTATGCCG ++ +B@BBCBCCBCBCCC8A<@################## +@SRR290479.2 HWI-EAS285:2:1:67:348/1 +AAAGTGCTACTACTTTTGAGTCTATNTCGTACGCCG ++ +BAA@7?A@@A@@B<'25?6>59:;7#<?######## +@SRR290479.3 HWI-EAS285:2:1:68:826/1 +TAGCTTATCAGACTGATGTTGACACNTCGTATGCCG ++ +BB@BBCCBCCBBB:%%83/>B7@44#;;324'117? +@SRR290479.4 HWI-EAS285:2:1:68:65/1 +ACTGGACTTGGAGTCCGAAGGCATCNCGTATTCCGT ++ +BBB@@ABAAB?9B42&9;################## +@SRR290479.5 HWI-EAS285:2:1:69:594/1 +AAGTGCCGCCAGGTTTTGAGTGGATNTCGTATGGCG ++ +AB?5;3>/=?>=;416481################# +@SRR290479.6 HWI-EAS285:2:1:70:700/1 +TATTGCACTTGTCCCGGCCTGAATCNCGTATCCCGT ++ +BCB=:ACCBB=>BB8<-################### +@SRR290479.7 HWI-EAS285:2:1:70:1679/1 +TGGTAGACTATGGAACGTAGGATCTNGCATGCCGCC ++ +BCBBCCBCCCBCCA?AB>:B@><>############ +@SRR290479.8 HWI-EAS285:2:1:71:1400/1 +AGTGGTAGAGCATTTGAATCTCGTANGCCGTCTTCT ++ +7@BC>>@55CCBCA3CBA14B.A16#*;9359B### +@SRR290479.9 HWI-EAS285:2:1:71:795/1 +TAGCTTATCAGACTGATGTTGACATNTCGTACGCCG ++ +BBBBBCBBCB;>AA',9=18?1:7:#<;57###### +@SRR290479.10 HWI-EAS285:2:1:71:596/1 +TTTGGCAATGGTAGAACTCCCACACNTCGTAGGCCG ++ +B@B>7>9A@<46B@79972#################
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/yac.out Sat May 17 18:41:24 2014 -0400 @@ -0,0 +1,12 @@ +>1 +TGTAAACATCCCCGACTGGCAGC +>2 +AAAGTGCTACTACTTTTGAGTCT +>3 +ACTGGACTTGGAGTCCGAAGGC +>4 +AAGTGCCGCCAGGTTTTGAGTGG +>5 +TATTGCACTTGTCCCGGCCTGAATCNCGT +>6 +TAGCTTATCAGACTGATGTTGAC
--- a/yac.py Sat May 17 18:22:46 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,78 +0,0 @@ -#!/usr/bin/python -# yac = yet another clipper -# v 0.9.1 -# Usage yac.py $input $output $adapter_to_clip $min $max $Nmode -# Christophe Antoniewski <drosofff@gmail.com> - -import sys, string - -class Clip: - def __init__(self, inputfile, outputfile, adapter, minsize, maxsize): - self.inputfile = inputfile - self.outputfile = outputfile - self.adapter = adapter - self.minsize = int(minsize) - self.maxsize = int(maxsize) - def motives (sequence): - '''return a list of motives for perfect (6nt) or imperfect (7nt with one mismatch) search on import string module''' - sequencevariants = [sequence[0:6]] # initializes the list with the 6mer perfect match - dicsubst= {"A":"TGCN", "T":"AGCN", "G":"TACN", "C":"GATN"} - for pos in enumerate(sequence[:6]): - for subst in dicsubst[pos[1]]: - sequencevariants.append(sequence[:pos[0]]+ subst + sequence[pos[0]+1:7]) - return sequencevariants - self.adaptmotifs= motives(self.adapter) - - def scanadapt(self, adaptmotives=[], sequence=""): - if sequence.rfind(adaptmotives[0]) != -1: - return sequence[:sequence.rfind(adaptmotives[0])] - for motif in adaptmotives[1:]: - if sequence.rfind(motif) != -1: - return sequence[:sequence.rfind(motif)] - return sequence - - def clip_with_N (self): - iterator = 0 - id = 0 - F = open (self.inputfile, "r") - O = open (self.outputfile, "w") - for line in F: - iterator += 1 - if iterator % 4 == 2: - trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) - if self.minsize <= len(trim) <= self.maxsize: - id += 1 - print >> O, ">%i\n%s" % (id, trim) - F.close() - O.close() - def clip_without_N (self): - iterator = 0 - id = 0 - F = open (self.inputfile, "r") - O = open (self.outputfile, "w") - for line in F: - iterator += 1 - if iterator % 4 == 2: - trim = self.scanadapt (self.adaptmotifs, line.rstrip() ) - if "N" in trim: continue - if self.minsize <= len(trim) <= self.maxsize: - id += 1 - print >> O, ">%i\n%s" % (id, trim) - F.close() - O.close() - -def __main__ (inputfile, outputfile, adapter, minsize, maxsize, Nmode): - instanceClip = Clip (inputfile, outputfile, adapter, minsize, maxsize) - if Nmode == "accepted": - instanceClip.clip_with_N() - else: - instanceClip.clip_without_N() - -if __name__ == "__main__" : - input = sys.argv[1] - output = sys.argv[2] - adapter = sys.argv[3] - minsize = sys.argv[4] - maxsize = sys.argv[5] - Nmode = sys.argv[6] - __main__(input, output, adapter, minsize, maxsize, Nmode)
--- a/yac.xml Sat May 17 18:22:46 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ - <tool id="yac" name="Clip adapter"> - <description></description> - <command interpreter="python">yac.py $input $output $clip_source.clip_sequence $min $max $Nmode</command> - <inputs> - <param format="fastq" name="input" type="data" label="Source file"/> - <param name="min" type="integer" size="4" value="15" label="min size"/> - <param name="max" type="integer" size="4" value="36" label="max size"/> - <param name="Nmode" type="select" label="Reads containing Nst"> - <option value="accepted" selected="True">Accepted</option> - <option value="rejected">Rejected</option> - </param> - <conditional name="clip_source"> - <param name="clip_source_list" type="select" label="Source" help="Built-in adapters or User-provided"> - <option value="prebuilt" selected="True">Use a built-in adapter (select from the list below)</option> - <option value="user">Use custom sequence</option> - </param> - <when value="prebuilt"> - <param name="clip_sequence" type="select" label="Select Adapter to clip" help="if your adapter is not listed, input your own sequence"> - <option value="TCGTATGCCGTCTTCTGCTTG">Solexa TCGTATGCCGTCTTCTGCTTG</option> - <option value="ATCTCGTATGCCGTCTTCTGCTT">Illumina ATCTCGTATGCCGTCTTCTGCTT</option> - <option value="TGGAATTCTCGGGTGCCAAG" selected="True">Illumina TruSeq TGGAATTCTCGGGTGCCAAG</option> - <option value="CTGTAGGCACCATCAATCGT">IdT CTGTAGGCACCATCAATCGT</option> - </param> - </when> - <when value="user"> - <param name="clip_sequence" type="text" size="35" label="Enter your Sequence" value="GAATCC"/> - </when> - </conditional> - </inputs> - <outputs> - <data format="fasta" name="output" metadata="input" /> - </outputs> - - <help> -<!-- write a decent doc ! --> -This tool clips adapter sequences from a fastq file and fasta file of clipped reads with renumbered fasta headers. - -Clipped sequences with Ns can be discarded. - -Min size and max size filter clipped reads on their size. - -Note that unclipped reads that satisfy the min and max size conditions are kept. - </help> - -<!-- write a <test> section --> - <tests> - <test> - <param name="input" value="yac.fastq" ftype="fastqsanger"/> - <param name="min" value="18" /> - <param name="max" value="29" /> - <param name="clip_source_list" value="prebuilt" /> - <param name="clip_sequence" value="ATCTCGTATGCCGTCTTCTGCTT" /> - <param name="Nmode" value="accepted" /> - <output name="output" file="yac.out" /> - </test> - </tests> - -</tool>